diff options
Diffstat (limited to 'src/gallium/drivers')
396 files changed, 95599 insertions, 0 deletions
diff --git a/src/gallium/drivers/Makefile b/src/gallium/drivers/Makefile new file mode 100644 index 0000000000..6161cb6ff8 --- /dev/null +++ b/src/gallium/drivers/Makefile @@ -0,0 +1,20 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +SUBDIRS = $(GALLIUM_DRIVER_DIRS) + + +default: subdirs + + +subdirs: +	@for dir in $(SUBDIRS) ; do \ +		if [ -d $$dir ] ; then \ +			(cd $$dir && $(MAKE)) || exit 1 ; \ +		fi \ +	done + + +clean: +	rm -f `find . -name \*.[oa]` diff --git a/src/gallium/drivers/cell/Makefile b/src/gallium/drivers/cell/Makefile new file mode 100644 index 0000000000..47aef7b05f --- /dev/null +++ b/src/gallium/drivers/cell/Makefile @@ -0,0 +1,12 @@ +# Cell Gallium driver Makefile + + +default: +	( cd spu ; make ) +	( cd ppu ; make ) + + + +clean: +	( cd spu ; make clean ) +	( cd ppu ; make clean ) diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h new file mode 100644 index 0000000000..1f6860da11 --- /dev/null +++ b/src/gallium/drivers/cell/common.h @@ -0,0 +1,376 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * Types and tokens which are common to the SPU and PPU code. + */ + + +#ifndef CELL_COMMON_H +#define CELL_COMMON_H + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" + + +/** The standard assert macro doesn't seem to work reliably */ +#define ASSERT(x) \ +   if (!(x)) { \ +      ubyte *p = NULL; \ +      fprintf(stderr, "%s:%d: %s(): assertion %s failed.\n", \ +              __FILE__, __LINE__, __FUNCTION__, #x);             \ +      *p = 0; \ +      exit(1); \ +   } + + + +#define JOIN(x, y) JOIN_AGAIN(x, y) +#define JOIN_AGAIN(x, y) x ## y + +#define STATIC_ASSERT(e) \ +{typedef char JOIN(assertion_failed_at_line_, __LINE__) [(e) ? 1 : -1];} + + + +/** for sanity checking */ +#define ASSERT_ALIGN16(ptr) \ +  ASSERT((((unsigned long) (ptr)) & 0xf) == 0); + + +/** round up value to next multiple of 4 */ +#define ROUNDUP4(k)  (((k) + 0x3) & ~0x3) + +/** round up value to next multiple of 8 */ +#define ROUNDUP8(k)  (((k) + 0x7) & ~0x7) + +/** round up value to next multiple of 16 */ +#define ROUNDUP16(k)  (((k) + 0xf) & ~0xf) + + +#define CELL_MAX_SPUS 8 + +#define CELL_MAX_SAMPLERS 4 +#define CELL_MAX_TEXTURE_LEVELS 12  /* 2k x 2k */ +#define CELL_MAX_CONSTANTS 32  /**< number of float[4] constants */ +#define CELL_MAX_WIDTH 1024    /**< max framebuffer width */ +#define CELL_MAX_HEIGHT 1024   /**< max framebuffer width */ + +#define TILE_SIZE 32 + + +/** + * The low byte of a mailbox word contains the command opcode. + * Remaining higher bytes are command specific. + */ +#define CELL_CMD_OPCODE_MASK 0xff + +#define CELL_CMD_EXIT                 1 +#define CELL_CMD_CLEAR_SURFACE        2 +#define CELL_CMD_FINISH               3 +#define CELL_CMD_RENDER               4 +#define CELL_CMD_BATCH                5 +#define CELL_CMD_RELEASE_VERTS        6 +#define CELL_CMD_STATE_FRAMEBUFFER   10 +#define CELL_CMD_STATE_FRAGMENT_OPS  11 +#define CELL_CMD_STATE_SAMPLER       12 +#define CELL_CMD_STATE_TEXTURE       13 +#define CELL_CMD_STATE_VERTEX_INFO   14 +#define CELL_CMD_STATE_VIEWPORT      15 +#define CELL_CMD_STATE_UNIFORMS      16 +#define CELL_CMD_STATE_VS_ARRAY_INFO 17 +#define CELL_CMD_STATE_BIND_VS       18 +#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19 +#define CELL_CMD_STATE_ATTRIB_FETCH  20 +#define CELL_CMD_STATE_FS_CONSTANTS  21 +#define CELL_CMD_STATE_RASTERIZER    22 +#define CELL_CMD_VS_EXECUTE          23 +#define CELL_CMD_FLUSH_BUFFER_RANGE  24 +#define CELL_CMD_FENCE               25 + + +/** Command/batch buffers */ +#define CELL_NUM_BUFFERS 4 +#define CELL_BUFFER_SIZE (4*1024)  /**< 16KB would be the max */ + +#define CELL_BUFFER_STATUS_FREE 10 +#define CELL_BUFFER_STATUS_USED 20 + +/** Debug flags */ +#define CELL_DEBUG_CHECKER              (1 << 0) +#define CELL_DEBUG_ASM                  (1 << 1) +#define CELL_DEBUG_SYNC                 (1 << 2) +#define CELL_DEBUG_FRAGMENT_OPS         (1 << 3) +#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4) +#define CELL_DEBUG_CMD                  (1 << 5) +#define CELL_DEBUG_CACHE                (1 << 6) + +#define CELL_FENCE_IDLE      0 +#define CELL_FENCE_EMITTED   1 +#define CELL_FENCE_SIGNALLED 2 + +#define CELL_FACING_FRONT    0 +#define CELL_FACING_BACK     1 + +struct cell_fence +{ +   /** There's a 16-byte status qword per SPU */ +   volatile uint status[CELL_MAX_SPUS][4]; +}; + +#ifdef __SPU__ +typedef vector unsigned int opcode_t; +#else +typedef unsigned int opcode_t[4]; +#endif + +/** + * Fence command sent to SPUs.  In response, the SPUs will write + * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory. + */ +struct cell_command_fence +{ +   opcode_t opcode;      /**< CELL_CMD_FENCE */ +   struct cell_fence *fence; +   uint32_t pad_[3]; +}; + + +/** + * Command to specify per-fragment operations state and generated code. + * Note that this is a variant-length structure, allocated with as  + * much memory as needed to hold the generated code; the "code" + * field *must* be the last field in the structure.  Also, the entire + * length of the structure (including the variant code field) must be + * a multiple of 8 bytes; we require that this structure itself be + * a multiple of 8 bytes, and that the generated code also be a multiple + * of 8 bytes. + * + * Also note that the dsa, blend, blend_color fields are really only needed + * for the fallback/C per-pixel code.  They're not used when we generate + * dynamic SPU fragment code (which is the normal case), and will eventually + * be removed from this structure. + */ +struct cell_command_fragment_ops +{ +   opcode_t opcode;      /**< CELL_CMD_STATE_FRAGMENT_OPS */ + +   /* Fields for the fallback case */ +   struct pipe_depth_stencil_alpha_state dsa; +   struct pipe_blend_state blend; +   struct pipe_blend_color blend_color; + +   /* Fields for the generated SPU code */ +   unsigned total_code_size; +   unsigned front_code_index; +   unsigned back_code_index; +   /* this field has variant length, and must be the last field in  +    * the structure +    */ +   unsigned code[0]; +}; + + +/** Max instructions for fragment programs */ +#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 512 + +/** + * Command to send a fragment program to SPUs. + */ +struct cell_command_fragment_program +{ +   opcode_t opcode;      /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */ +   uint num_inst;        /**< Number of instructions */ +   uint32_t pad[3]; +   unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; +}; + + +/** + * Tell SPUs about the framebuffer size, location + */ +struct cell_command_framebuffer +{ +   opcode_t opcode;     /**< CELL_CMD_STATE_FRAMEBUFFER */ +   int width, height; +   void *color_start, *depth_start; +   enum pipe_format color_format, depth_format; +   uint32_t pad_[2]; +}; + + +/** + * Tell SPUs about rasterizer state. + */ +struct cell_command_rasterizer +{ +   opcode_t opcode;    /**< CELL_CMD_STATE_RASTERIZER */ +   struct pipe_rasterizer_state rasterizer; +}; + + +/** + * Clear framebuffer to the given value/color. + */ +struct cell_command_clear_surface +{ +   opcode_t opcode;     /**< CELL_CMD_CLEAR_SURFACE */ +   uint surface; /**< Temporary: 0=color, 1=Z */ +   uint value; +   uint32_t pad[2]; +}; + + +/** + * Array info used by the vertex shader's vertex puller. + */ +struct cell_array_info +{ +   uint64_t base;      /**< Base address of the 0th element. */ +   uint attr;          /**< Attribute that this state is for. */ +   uint pitch;         /**< Byte pitch from one entry to the next. */ +   uint size; +   uint function_offset; +}; + + +struct cell_attribute_fetch_code +{ +   uint64_t base; +   uint size; +}; + + +struct cell_buffer_range +{ +   uint64_t base; +   unsigned size; +}; + + +struct cell_shader_info +{ +   uint64_t declarations; +   uint64_t instructions; +   uint64_t  immediates; + +   unsigned num_outputs; +   unsigned num_declarations; +   unsigned num_instructions; +   unsigned num_immediates; +}; + + +#define SPU_VERTS_PER_BATCH 64 +struct cell_command_vs +{ +   opcode_t opcode;       /**< CELL_CMD_VS_EXECUTE */ +   uint64_t vOut[SPU_VERTS_PER_BATCH]; +   unsigned num_elts; +   unsigned elts[SPU_VERTS_PER_BATCH]; +   float plane[12][4]; +   unsigned nr_planes; +   unsigned nr_attrs; +}; + + +struct cell_command_render +{ +   opcode_t opcode;   /**< CELL_CMD_RENDER */ +   uint prim_type;    /**< PIPE_PRIM_x */ +   uint num_verts; +   uint vertex_size;  /**< bytes per vertex */ +   uint num_indexes; +   uint vertex_buf;  /**< which cell->buffer[] contains the vertex data */ +   float xmin, ymin, xmax, ymax;  /* XXX another dummy field */ +   uint min_index; +   boolean inline_verts; +   uint32_t pad_[1]; +}; + + +struct cell_command_release_verts +{ +   opcode_t opcode;         /**< CELL_CMD_RELEASE_VERTS */ +   uint vertex_buf;    /**< in [0, CELL_NUM_BUFFERS-1] */ +   uint32_t pad_[3]; +}; + + +struct cell_command_sampler +{ +   opcode_t opcode;         /**< CELL_CMD_STATE_SAMPLER */ +   uint unit; +   struct pipe_sampler_state state; +   uint32_t pad_[1]; +}; + + +struct cell_command_texture +{ +   opcode_t opcode;     /**< CELL_CMD_STATE_TEXTURE */ +   uint target;         /**< PIPE_TEXTURE_x */ +   uint unit; +   void *start[CELL_MAX_TEXTURE_LEVELS];   /**< Address in main memory */ +   ushort width[CELL_MAX_TEXTURE_LEVELS]; +   ushort height[CELL_MAX_TEXTURE_LEVELS]; +   ushort depth[CELL_MAX_TEXTURE_LEVELS]; +}; + + +#define MAX_SPU_FUNCTIONS 12 +/** + * Used to tell the PPU about the address of particular functions in the + * SPU's address space. + */ +struct cell_spu_function_info +{ +   uint num; +   char names[MAX_SPU_FUNCTIONS][16]; +   uint addrs[MAX_SPU_FUNCTIONS]; +   char pad[12];   /**< Pad struct to multiple of 16 bytes (256 currently) */ +}; + + +/** This is the object passed to spe_create_thread() */ +struct cell_init_info +{ +   unsigned id; +   unsigned num_spus; +   unsigned debug_flags;  /**< mask of CELL_DEBUG_x flags */ +   float inv_timebase;    /**< 1.0/timebase, for perf measurement */ + +   /** Buffers for command batches, vertex/index data */ +   ubyte *buffers[CELL_NUM_BUFFERS]; +   uint *buffer_status;  /**< points at cell_context->buffer_status */ + +   struct cell_spu_function_info *spu_functions; +} ALIGN16_ATTRIB; + + +#endif /* CELL_COMMON_H */ diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile new file mode 100644 index 0000000000..c92f8e5cba --- /dev/null +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -0,0 +1,86 @@ +# Gallium3D Cell driver: PPU code + +# This makefile builds the libcell.a library which gets pulled into +# the main libGL.so library + + +TOP = ../../../../.. +include $(TOP)/configs/current + + +# This is the "top-level" cell PPU driver code, will get pulled into libGL.so +# by the winsys Makefile. +CELL_LIB = ../libcell.a + + +# This is the SPU code.  We'd like to be able to put this into the libcell.a +# archive with the PPU code, but nesting .a libs doesn't seem to work. +# So, it's pulled into libGL.so in gallium/winsys/xlib/Makefile +SPU_CODE_MODULE = ../spu/g3d_spu.a + + +SOURCES = \ +	cell_batch.c \ +	cell_clear.c \ +	cell_context.c \ +	cell_draw_arrays.c \ +	cell_fence.c \ +	cell_flush.c \ +	cell_gen_fragment.c \ +	cell_gen_fp.c \ +	cell_state_derived.c \ +	cell_state_emit.c \ +	cell_state_shader.c \ +	cell_pipe_state.c \ +	cell_screen.c \ +	cell_state_vertex.c \ +	cell_spu.c \ +	cell_surface.c \ +	cell_texture.c \ +	cell_vbuf.c \ +	cell_vertex_fetch.c \ +	cell_vertex_shader.c + + +OBJECTS = $(SOURCES:.c=.o) \ + +INCLUDE_DIRS = \ +	-I$(TOP)/src/mesa \ +	-I$(TOP)/src/gallium/include \ +	-I$(TOP)/src/gallium/auxiliary \ +	-I$(TOP)/src/gallium/drivers + +.c.o: +	$(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + + +.c.s: +	$(CC) -S $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + + +default: $(CELL_LIB) + + +$(CELL_LIB): $(OBJECTS) $(SPU_CODE_MODULE) +#	ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) # doesn't work +	ar -ru $(CELL_LIB) $(OBJECTS) + +#$(PROG): $(PPU_OBJECTS) +#	$(CC) -o $(PROG) $(PPU_OBJECTS) $(SPU_CODE_MODULE) $(PPU_LFLAGS) + + + +clean: +	rm -f *.o *~ $(CELL_LIB) + + + +depend: $(SOURCES) +	rm -f depend +	touch depend +	$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null + +include depend + + + diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c new file mode 100644 index 0000000000..fe144f8b84 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_batch.c @@ -0,0 +1,260 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_fence.h" +#include "cell_spu.h" + + + +/** + * Search the buffer pool for an empty/free buffer and return its index. + * Buffers are used for storing vertex data, state and commands which + * will be sent to the SPUs. + * If no empty buffers are available, wait for one. + * \return buffer index in [0, CELL_NUM_BUFFERS-1] + */ +uint +cell_get_empty_buffer(struct cell_context *cell) +{ +   static uint prev_buffer = 0; +   uint buf = (prev_buffer + 1) % CELL_NUM_BUFFERS; +   uint tries = 0; + +   /* Find a buffer that's marked as free by all SPUs */ +   while (1) { +      uint spu, num_free = 0; + +      for (spu = 0; spu < cell->num_spus; spu++) { +         if (cell->buffer_status[spu][buf][0] == CELL_BUFFER_STATUS_FREE) { +            num_free++; + +            if (num_free == cell->num_spus) { +               /* found a free buffer, now mark status as used */ +               for (spu = 0; spu < cell->num_spus; spu++) { +                  cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; +               } +               /* +               printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries); +               */ +               prev_buffer = buf; + +               /* release tex buffer associated w/ prev use of this batch buf */ +               cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]); + +               return buf; +            } +         } +         else { +            break; +         } +      } + +      /* try next buf */ +      buf = (buf + 1) % CELL_NUM_BUFFERS; + +      tries++; +      if (tries == 100) { +         /* +         printf("PPU WAITING for buffer...\n"); +         */ +      } +   } +} + + +/** + * Append a fence command to the current batch buffer. + * Note that we're sure there's always room for this because of the + * adjusted size check in cell_batch_free_space(). + */ +static void +emit_fence(struct cell_context *cell) +{ +   const uint batch = cell->cur_batch; +   const uint size = cell->buffer_size[batch]; +   struct cell_command_fence *fence_cmd; +   struct cell_fence *fence = &cell->fenced_buffers[batch].fence; +   uint i; + +   /* set fence status to emitted, not yet signalled */ +   for (i = 0; i < cell->num_spus; i++) { +      fence->status[i][0] = CELL_FENCE_EMITTED; +   } + +   STATIC_ASSERT(sizeof(struct cell_command_fence) % 16 == 0); +   ASSERT(size % 16 == 0); +   ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE); + +   fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size); +   fence_cmd->opcode[0] = CELL_CMD_FENCE; +   fence_cmd->fence = fence; + +   /* update batch buffer size */ +   cell->buffer_size[batch] = size + sizeof(struct cell_command_fence); +} + + +/** + * Flush the current batch buffer to the SPUs. + * An empty buffer will be found and set as the new current batch buffer + * for subsequent commands/data. + */ +void +cell_batch_flush(struct cell_context *cell) +{ +   static boolean flushing = FALSE; +   uint batch = cell->cur_batch; +   uint size = cell->buffer_size[batch]; +   uint spu, cmd_word; + +   assert(!flushing); + +   if (size == 0) +      return; + +   /* Before we use this batch buffer, make sure any fenced texture buffers +    * are released. +    */ +   if (cell->fenced_buffers[batch].head) { +      emit_fence(cell); +      size = cell->buffer_size[batch]; +   } + +   flushing = TRUE; + +   assert(batch < CELL_NUM_BUFFERS); + +   /* +   printf("cell_batch_dispatch: buf %u at %p, size %u\n", +          batch, &cell->buffer[batch][0], size); +   */ +      +   /* +    * Build "BATCH" command and send to all SPUs. +    */ +   cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16); + +   for (spu = 0; spu < cell->num_spus; spu++) { +      assert(cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_USED); +      send_mbox_message(cell_global.spe_contexts[spu], cmd_word); +   } + +   /* When the SPUs are done copying the buffer into their locals stores +    * they'll write a BUFFER_STATUS_FREE message into the buffer_status[] +    * array indicating that the PPU can re-use the buffer. +    */ + +   batch = cell_get_empty_buffer(cell); + +   cell->buffer_size[batch] = 0;  /* empty */ +   cell->cur_batch = batch; + +   flushing = FALSE; +} + + +/** + * Return the number of bytes free in the current batch buffer. + */ +uint +cell_batch_free_space(const struct cell_context *cell) +{ +   uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch]; +   free -= sizeof(struct cell_command_fence); +   return free; +} + + +/** + * Allocate space in the current batch buffer for 'bytes' space. + * Bytes must be a multiple of 16 bytes.  Allocation will be 16 byte aligned. + * \return address in batch buffer to put data + */ +void * +cell_batch_alloc16(struct cell_context *cell, uint bytes) +{ +   void *pos; +   uint size; + +   ASSERT(bytes % 16 == 0); +   ASSERT(bytes <= CELL_BUFFER_SIZE); +   ASSERT(cell->cur_batch >= 0); + +#ifdef ASSERT +   { +      uint spu; +      for (spu = 0; spu < cell->num_spus; spu++) { +         ASSERT(cell->buffer_status[spu][cell->cur_batch][0] +                 == CELL_BUFFER_STATUS_USED); +      } +   } +#endif + +   size = cell->buffer_size[cell->cur_batch]; + +   if (bytes > cell_batch_free_space(cell)) { +      cell_batch_flush(cell); +      size = 0; +   } + +   ASSERT(size % 16 == 0); +   ASSERT(size + bytes <= CELL_BUFFER_SIZE); + +   pos = (void *) (cell->buffer[cell->cur_batch] + size); + +   cell->buffer_size[cell->cur_batch] = size + bytes; + +   return pos; +} + + +/** + * One-time init of batch buffers. + */ +void +cell_init_batch_buffers(struct cell_context *cell) +{ +   uint spu, buf; + +   /* init command, vertex/index buffer info */ +   for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { +      cell->buffer_size[buf] = 0; + +      /* init batch buffer status values, +       * mark 0th buffer as used, rest as free. +       */ +      for (spu = 0; spu < cell->num_spus; spu++) { +         if (buf == 0) +            cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; +         else +            cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE; +      } +   } +} diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h new file mode 100644 index 0000000000..290136031a --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_batch.h @@ -0,0 +1,54 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef CELL_BATCH_H +#define CELL_BATCH_H + +#include "pipe/p_compiler.h" + + +struct cell_context; + + +extern uint +cell_get_empty_buffer(struct cell_context *cell); + +extern void +cell_batch_flush(struct cell_context *cell); + +extern uint +cell_batch_free_space(const struct cell_context *cell); + +extern void * +cell_batch_alloc16(struct cell_context *cell, uint bytes); + +extern void +cell_init_batch_buffers(struct cell_context *cell); + + +#endif /* CELL_BATCH_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c new file mode 100644 index 0000000000..c2e276988c --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -0,0 +1,123 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * Authors + *  Brian Paul + */ + +#include <stdio.h> +#include <assert.h> +#include <stdint.h> +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" +#include "cell/common.h" +#include "cell_clear.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_state.h" + + +/** + * Convert packed pixel from one format to another. + */ +static unsigned +convert_color(enum pipe_format srcFormat, unsigned srcColor, +              enum pipe_format dstFormat) +{ +   ubyte r, g, b, a; +   unsigned dstColor; + +   util_unpack_color_ub(srcFormat, &srcColor, &r, &g, &b, &a); +   util_pack_color_ub(r, g, b, a, dstFormat, &dstColor); + +   return dstColor; +} + + + +/** + * Called via pipe->clear() + */ +void +cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, +                   unsigned clearValue) +{ +   struct pipe_screen *screen = pipe->screen; +   struct cell_context *cell = cell_context(pipe); +   uint surfIndex; + +   if (cell->dirty) +      cell_update_derived(cell); + + +   if (!cell->cbuf_map[0]) +      cell->cbuf_map[0] = screen->surface_map(screen, ps, +                                              PIPE_BUFFER_USAGE_GPU_WRITE); + +   if (ps == cell->framebuffer.zsbuf) { +      /* clear z/stencil buffer */ +      surfIndex = 1; +   } +   else { +      /* clear color buffer */ +      surfIndex = 0; + +      if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) { +         clearValue = convert_color(PIPE_FORMAT_A8R8G8B8_UNORM, clearValue, +                                    ps->format); +      } +   } + + +   /* Build a CLEAR command and place it in the current batch buffer */ +   { +      STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0); +      struct cell_command_clear_surface *clr +         = (struct cell_command_clear_surface *) +         cell_batch_alloc16(cell, sizeof(*clr)); +      clr->opcode[0] = CELL_CMD_CLEAR_SURFACE; +      clr->surface = surfIndex; +      clr->value = clearValue; +   } + +   /* Technically, the surface's contents are now known and cleared, +    * so we could set the status to PIPE_SURFACE_STATUS_CLEAR.  But +    * it turns out it's quite painful to recognize when any particular +    * surface goes from PIPE_SURFACE_STATUS_CLEAR to  +    * PIPE_SURFACE_STATUS_DEFINED (i.e. with known contents), because +    * the drawing commands could be operating on numerous draw buffers, +    * which we'd have to iterate through to set all their stati... +    * For now, we cheat a bit and set the surface's status to DEFINED +    * right here.  Later we should revisit this and set the status to +    * CLEAR here, and find a better place to set the status to DEFINED. +    */ +   ps->status = PIPE_SURFACE_STATUS_DEFINED; +} diff --git a/src/gallium/drivers/cell/ppu/cell_clear.h b/src/gallium/drivers/cell/ppu/cell_clear.h new file mode 100644 index 0000000000..ff47d43f4c --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_clear.h @@ -0,0 +1,43 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef CELL_CLEAR_H +#define CELL_CLEAR_H + + +struct pipe_context; +struct pipe_surface; + + +extern void +cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, +                   unsigned clearValue); + + + +#endif /* CELL_CLEAR_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c new file mode 100644 index 0000000000..ae82ded334 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -0,0 +1,183 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * Authors + *  Brian Paul + */ + + +#include <stdio.h> + +#include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "util/u_memory.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_screen.h" + +#include "draw/draw_context.h" +#include "draw/draw_private.h" + +#include "cell/common.h" +#include "cell_batch.h" +#include "cell_clear.h" +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_fence.h" +#include "cell_flush.h" +#include "cell_state.h" +#include "cell_surface.h" +#include "cell_spu.h" +#include "cell_pipe_state.h" +#include "cell_texture.h" +#include "cell_vbuf.h" + + + +static void +cell_destroy_context( struct pipe_context *pipe ) +{ +   struct cell_context *cell = cell_context(pipe); + +   util_delete_keymap(cell->fragment_ops_cache, NULL); + +   cell_spu_exit(cell); + +   align_free(cell); +} + + +static struct draw_context * +cell_draw_create(struct cell_context *cell) +{ +   struct draw_context *draw = draw_create(); + +#if 0 /* broken */ +   if (getenv("GALLIUM_CELL_VS")) { +      /* plug in SPU-based vertex transformation code */ +      draw->shader_queue_flush = cell_vertex_shader_queue_flush; +      draw->driver_private = cell; +   } +#endif + +   return draw; +} + + +static const struct debug_named_value cell_debug_flags[] = { +   {"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */ +   {"asm", CELL_DEBUG_ASM},        /**< dump SPU asm code */ +   {"sync", CELL_DEBUG_SYNC},      /**< SPUs do synchronous DMA */ +   {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/ +   {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/ +   {"cmd", CELL_DEBUG_CMD},       /**< SPUs dump command buffer info */ +   {"cache", CELL_DEBUG_CACHE},   /**< report texture cache stats on exit */ +   {NULL, 0} +}; + + +struct pipe_context * +cell_create_context(struct pipe_screen *screen, +                    struct cell_winsys *cws) +{ +   struct cell_context *cell; +   uint i; + +   /* some fields need to be 16-byte aligned, so align the whole object */ +   cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); +   if (!cell) +      return NULL; + +   memset(cell, 0, sizeof(*cell)); + +   cell->winsys = cws; +   cell->pipe.winsys = screen->winsys; +   cell->pipe.screen = screen; +   cell->pipe.destroy = cell_destroy_context; + +   cell->pipe.clear = cell_clear_surface; +   cell->pipe.flush = cell_flush; + +#if 0 +   cell->pipe.begin_query = cell_begin_query; +   cell->pipe.end_query = cell_end_query; +   cell->pipe.wait_query = cell_wait_query; +#endif + +   cell_init_draw_functions(cell); +   cell_init_state_functions(cell); +   cell_init_shader_functions(cell); +   cell_init_surface_functions(cell); +   cell_init_vertex_functions(cell); + +   cell->draw = cell_draw_create(cell); + +   /* Create cache of fragment ops generated code */ +   cell->fragment_ops_cache = +      util_new_keymap(sizeof(struct cell_fragment_ops_key), ~0, NULL); + +   cell_init_vbuf(cell); + +   draw_set_rasterize_stage(cell->draw, cell->vbuf); + +   /* convert all points/lines to tris for the time being */ +   draw_wide_point_threshold(cell->draw, 0.0); +   draw_wide_line_threshold(cell->draw, 0.0); + +   /* get env vars or read config file to get debug flags */ +   cell->debug_flags = debug_get_flags_option("CELL_DEBUG",  +                                              cell_debug_flags,  +                                              0 ); + +   for (i = 0; i < CELL_NUM_BUFFERS; i++) +      cell_fence_init(&cell->fenced_buffers[i].fence); + + +   /* +    * SPU stuff +    */ +   /* This call only works with SDK 3.0.  Anyone still using 2.1??? */ +   cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1); +   cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1); +   if (cell->debug_flags) { +      printf("Cell: found %d Cell(s) with %u SPUs\n", +             cell->num_cells, cell->num_spus); +   } +   if (getenv("CELL_NUM_SPUS")) { +      cell->num_spus = atoi(getenv("CELL_NUM_SPUS")); +      assert(cell->num_spus > 0); +   } + +   cell_start_spus(cell); + +   cell_init_batch_buffers(cell); + +   /* make sure SPU initializations are done before proceeding */ +   cell_flush_int(cell, CELL_FLUSH_WAIT); + +   return &cell->pipe; +} diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h new file mode 100644 index 0000000000..eb1397bb3f --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -0,0 +1,205 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef CELL_CONTEXT_H +#define CELL_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "draw/draw_vertex.h" +#include "draw/draw_vbuf.h" +#include "cell_winsys.h" +#include "cell/common.h" +#include "rtasm/rtasm_ppc_spe.h" +#include "tgsi/tgsi_scan.h" +#include "util/u_keymap.h" + + +struct cell_vbuf_render; + + +/** + * Cell vertex shader state, subclass of pipe_shader_state. + */ +struct cell_vertex_shader_state +{ +   struct pipe_shader_state shader; +   struct tgsi_shader_info info; +   void *draw_data; +}; + + +/** + * Cell fragment shader state, subclass of pipe_shader_state. + */ +struct cell_fragment_shader_state +{ +   struct pipe_shader_state shader; +   struct tgsi_shader_info info; +   struct spe_function code; +   void *data; +}; + + +/** + * Key for mapping per-fragment state to cached SPU machine code. + *  keymap(cell_fragment_ops_key) => cell_command_fragment_ops + */ +struct cell_fragment_ops_key +{ +   struct pipe_blend_state blend; +   struct pipe_blend_color blend_color; +   struct pipe_depth_stencil_alpha_state dsa; +   enum pipe_format color_format; +   enum pipe_format zs_format; +}; + + +struct cell_buffer_node; + +/** + * Fenced buffer list.  List of buffers which can be unreferenced after + * the fence has been executed/signalled. + */ +struct cell_buffer_list +{ +   struct cell_fence fence ALIGN16_ATTRIB; +   struct cell_buffer_node *head; +}; + + +/** + * Per-context state, subclass of pipe_context. + */ +struct cell_context +{ +   struct pipe_context pipe; + +   struct cell_winsys *winsys; + +   const struct pipe_blend_state *blend; +   const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; +   uint num_samplers; +   const struct pipe_depth_stencil_alpha_state *depth_stencil; +   const struct pipe_rasterizer_state *rasterizer; +   const struct cell_vertex_shader_state *vs; +   const struct cell_fragment_shader_state *fs; + +   struct spe_function logic_op; + +   struct pipe_blend_color blend_color; +   struct pipe_clip_state clip; +   struct pipe_constant_buffer constants[2]; +   struct pipe_framebuffer_state framebuffer; +   struct pipe_poly_stipple poly_stipple; +   struct pipe_scissor_state scissor; +   struct cell_texture *texture[PIPE_MAX_SAMPLERS]; +   uint num_textures; +   struct pipe_viewport_state viewport; +   struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; +   uint num_vertex_buffers; +   struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; +   uint num_vertex_elements; + +   ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; +   ubyte *zsbuf_map; + +   struct pipe_surface *tex_surf; +   uint *tex_map; + +   uint dirty; +   uint dirty_textures;  /* bitmask of texture units */ +   uint dirty_samplers;  /* bitmask of sampler units */ + +   /** Cache of code generated for per-fragment ops */ +   struct keymap *fragment_ops_cache; + +   /** The primitive drawing context */ +   struct draw_context *draw; +   struct draw_stage *render_stage; + +   /** For post-transformed vertex buffering: */ +   struct cell_vbuf_render *vbuf_render; +   struct draw_stage *vbuf; + +   struct vertex_info vertex_info; + +   /** Mapped constant buffers */ +   void *mapped_constants[PIPE_SHADER_TYPES]; + +   struct cell_spu_function_info spu_functions ALIGN16_ATTRIB; + +   uint num_cells, num_spus; + +   /** Buffers for command batches, vertex/index data */ +   uint buffer_size[CELL_NUM_BUFFERS]; +   ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + +   int cur_batch;  /**< which buffer is being filled w/ commands */ + +   /** [4] to ensure 16-byte alignment for each status word */ +   uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + + +   /** Associated with each command/batch buffer is a list of pipe_buffers +    * that are fenced.  When the last command in a buffer is executed, the +    * fence will be signalled, indicating that any pipe_buffers preceeding +    * that fence can be unreferenced (and probably freed). +    */ +   struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS]; + + +   struct spe_function attrib_fetch; +   unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS]; + +   unsigned debug_flags; +}; + + + + +static INLINE struct cell_context * +cell_context(struct pipe_context *pipe) +{ +   return (struct cell_context *) pipe; +} + + +extern struct pipe_context * +cell_create_context(struct pipe_screen *screen, struct cell_winsys *cws); + +extern void +cell_vertex_shader_queue_flush(struct draw_context *draw); + + +/* XXX find a better home for this */ +extern void cell_update_vertex_fetch(struct draw_context *draw); + + +#endif /* CELL_CONTEXT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c new file mode 100644 index 0000000000..644496db40 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -0,0 +1,191 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Author: + *    Brian Paul + *    Keith Whitwell + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" + +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_state.h" +#include "cell_flush.h" + +#include "draw/draw_context.h" + + + +static void +cell_map_constant_buffers(struct cell_context *sp) +{ +   struct pipe_winsys *ws = sp->pipe.winsys; +   uint i; +   for (i = 0; i < 2; i++) { +      if (sp->constants[i].buffer && sp->constants[i].buffer->size) { +         sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, +                                                   PIPE_BUFFER_USAGE_CPU_READ); +         cell_flush_buffer_range(sp, sp->mapped_constants[i],  +                                 sp->constants[i].buffer->size); +      } +   } + +   draw_set_mapped_constant_buffer(sp->draw, +                                   sp->mapped_constants[PIPE_SHADER_VERTEX], +                                   sp->constants[PIPE_SHADER_VERTEX].buffer->size); +} + +static void +cell_unmap_constant_buffers(struct cell_context *sp) +{ +   struct pipe_winsys *ws = sp->pipe.winsys; +   uint i; +   for (i = 0; i < 2; i++) { +      if (sp->constants[i].buffer && sp->constants[i].buffer->size) +         ws->buffer_unmap(ws, sp->constants[i].buffer); +      sp->mapped_constants[i] = NULL; +   } +} + + + +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + * + * XXX should the element buffer be specified/bound with a separate function? + */ +static boolean +cell_draw_range_elements(struct pipe_context *pipe, +                         struct pipe_buffer *indexBuffer, +                         unsigned indexSize, +                         unsigned min_index, +                         unsigned max_index, +                         unsigned mode, unsigned start, unsigned count) +{ +   struct cell_context *sp = cell_context(pipe); +   struct draw_context *draw = sp->draw; +   unsigned i; + +   if (sp->dirty) +      cell_update_derived( sp ); + +#if 0 +   cell_map_surfaces(sp); +#endif +   cell_map_constant_buffers(sp); + +   /* +    * Map vertex buffers +    */ +   for (i = 0; i < sp->num_vertex_buffers; i++) { +      void *buf = pipe_buffer_map(pipe->screen, +                                           sp->vertex_buffer[i].buffer, +                                           PIPE_BUFFER_USAGE_CPU_READ); +      cell_flush_buffer_range(sp, buf, sp->vertex_buffer[i].buffer->size); +      draw_set_mapped_vertex_buffer(draw, i, buf); +   } +   /* Map index buffer, if present */ +   if (indexBuffer) { +      void *mapped_indexes = pipe_buffer_map(pipe->screen, +                                                      indexBuffer, +                                                      PIPE_BUFFER_USAGE_CPU_READ); +      draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); +   } +   else { +      /* no index/element buffer */ +      draw_set_mapped_element_buffer(draw, 0, NULL); +   } + + +   /* draw! */ +   draw_arrays(draw, mode, start, count); + +   /* +    * unmap vertex/index buffers - will cause draw module to flush +    */ +   for (i = 0; i < sp->num_vertex_buffers; i++) { +      draw_set_mapped_vertex_buffer(draw, i, NULL); +      pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); +   } +   if (indexBuffer) { +      draw_set_mapped_element_buffer(draw, 0, NULL); +      pipe_buffer_unmap(pipe->screen, indexBuffer); +   } + +   /* Note: leave drawing surfaces mapped */ +   cell_unmap_constant_buffers(sp); + +   return TRUE; +} + + +static boolean +cell_draw_elements(struct pipe_context *pipe, +                   struct pipe_buffer *indexBuffer, +                   unsigned indexSize, +                   unsigned mode, unsigned start, unsigned count) +{ +   return cell_draw_range_elements( pipe, indexBuffer, +                                    indexSize, +                                    0, 0xffffffff, +                                    mode, start, count ); +} + + +static boolean +cell_draw_arrays(struct pipe_context *pipe, unsigned mode, +                     unsigned start, unsigned count) +{ +   return cell_draw_elements(pipe, NULL, 0, mode, start, count); +} + + +static void +cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) +{ +   struct cell_context *cell = cell_context(pipe); +   draw_set_edgeflags(cell->draw, edgeflags); +} + + + +void +cell_init_draw_functions(struct cell_context *cell) +{ +   cell->pipe.draw_arrays = cell_draw_arrays; +   cell->pipe.draw_elements = cell_draw_elements; +   cell->pipe.draw_range_elements = cell_draw_range_elements; +   cell->pipe.set_edgeflags = cell_set_edgeflags; +} + diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h new file mode 100644 index 0000000000..148873aa67 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h @@ -0,0 +1,36 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef CELL_DRAW_ARRAYS_H +#define CELL_DRAW_ARRAYS_H + + +extern void +cell_init_draw_functions(struct cell_context *cell); + + +#endif /* CELL_DRAW_ARRAYS_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c new file mode 100644 index 0000000000..867b5dcaa0 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_fence.c @@ -0,0 +1,168 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include <unistd.h> +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_fence.h" +#include "cell_texture.h" + + +void +cell_fence_init(struct cell_fence *fence) +{ +   uint i; +   ASSERT_ALIGN16(fence->status); +   for (i = 0; i < CELL_MAX_SPUS; i++) { +      fence->status[i][0] = CELL_FENCE_IDLE; +   } +} + + +boolean +cell_fence_signalled(const struct cell_context *cell, +                     const struct cell_fence *fence) +{ +   uint i; +   for (i = 0; i < cell->num_spus; i++) { +      if (fence->status[i][0] != CELL_FENCE_SIGNALLED) +         return FALSE; +      /*assert(fence->status[i][0] == CELL_FENCE_EMITTED);*/ +   } +   return TRUE; +} + + +void +cell_fence_finish(const struct cell_context *cell, +                  const struct cell_fence *fence) +{ +   while (!cell_fence_signalled(cell, fence)) { +      usleep(10); +   } + +#ifdef DEBUG +   { +      uint i; +      for (i = 0; i < cell->num_spus; i++) { +         assert(fence->status[i][0] == CELL_FENCE_SIGNALLED); +      } +   } +#endif +} + + + + +struct cell_buffer_node +{ +   struct pipe_buffer *buffer; +   struct cell_buffer_node *next; +}; + + +static void +cell_add_buffer_to_list(struct cell_context *cell, +                        struct cell_buffer_list *list, +                        struct pipe_buffer *buffer) +{ +   struct pipe_screen *ps = cell->pipe.screen; +   struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node); +   /* create new list node which references the buffer, insert at head */ +   if (node) { +      pipe_buffer_reference(ps, &node->buffer, buffer); +      node->next = list->head; +      list->head = node; +   } +} + + +/** + * Wait for completion of the given fence, then unreference any buffers + * on the list. + * This typically unrefs/frees texture buffers after any rendering which uses + * them has completed. + */ +void +cell_free_fenced_buffers(struct cell_context *cell, +                         struct cell_buffer_list *list) +{ +   if (list->head) { +      struct pipe_screen *ps = cell->pipe.screen; +      struct cell_buffer_node *node; + +      cell_fence_finish(cell, &list->fence); + +      /* traverse the list, unreferencing buffers, freeing nodes */ +      node = list->head; +      while (node) { +         struct cell_buffer_node *next = node->next; +         assert(node->buffer); +         pipe_buffer_unmap(ps, node->buffer); +#if 0 +         printf("Unref buffer %p\n", node->buffer); +         if (node->buffer->refcount == 1) +            printf("   Delete!\n"); +#endif +         pipe_buffer_reference(ps, &node->buffer, NULL); +         FREE(node); +         node = next; +      } +      list->head = NULL; +   } +} + + +/** + * This should be called for each render command. + * Any texture buffers that are current bound will be added to a fenced + * list to be freed later when the fence is executed/signalled. + */ +void +cell_add_fenced_textures(struct cell_context *cell) +{ +   struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch]; +   uint i; + +   for (i = 0; i < cell->num_textures; i++) { +      struct cell_texture *ct = cell->texture[i]; +      if (ct) { +         uint level; +         for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { +            if (ct->tiled_buffer[level]) { +#if 0 +               printf("Adding texture %p buffer %p to list\n", +                      ct, ct->tiled_buffer[level]); +#endif +               cell_add_buffer_to_list(cell, list, ct->tiled_buffer[level]); +            } +         } +      } +   } +} diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h new file mode 100644 index 0000000000..536b4ba411 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_fence.h @@ -0,0 +1,57 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef CELL_FENCE_H +#define CELL_FENCE_H + + +extern void +cell_fence_init(struct cell_fence *fence); + + +extern boolean +cell_fence_signalled(const struct cell_context *cell, +                     const struct cell_fence *fence); + + +extern void +cell_fence_finish(const struct cell_context *cell, +                  const struct cell_fence *fence); + + + +extern void +cell_free_fenced_buffers(struct cell_context *cell, +                         struct cell_buffer_list *list); + + +extern void +cell_add_fenced_textures(struct cell_context *cell); + + +#endif /* CELL_FENCE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c new file mode 100644 index 0000000000..8275c9dc9c --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -0,0 +1,112 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_render.h" +#include "draw/draw_context.h" + + +/** + * Called via pipe->flush() + */ +void +cell_flush(struct pipe_context *pipe, unsigned flags, +           struct pipe_fence_handle **fence) +{ +   struct cell_context *cell = cell_context(pipe); + +   if (fence) { +      *fence = NULL; +      /* XXX: Implement real fencing */ +      flags |= CELL_FLUSH_WAIT; +   } + +   if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_RENDER_CACHE)) +      flags |= CELL_FLUSH_WAIT; + +   draw_flush( cell->draw ); +   cell_flush_int(cell, flags); +} + + +/** + * Cell internal flush function.  Send the current batch buffer to all SPUs. + * If flags & CELL_FLUSH_WAIT, do not return until the SPUs are idle. + * \param flags  bitmask of flags CELL_FLUSH_WAIT, or zero + */ +void +cell_flush_int(struct cell_context *cell, unsigned flags) +{ +   static boolean flushing = FALSE;  /* recursion catcher */ +   uint i; + +   ASSERT(!flushing); +   flushing = TRUE; + +   if (flags & CELL_FLUSH_WAIT) { +      STATIC_ASSERT(sizeof(opcode_t) % 16 == 0); +      opcode_t *cmd = (opcode_t*) cell_batch_alloc16(cell, sizeof(opcode_t)); +      *cmd[0] = CELL_CMD_FINISH; +   } + +   cell_batch_flush(cell); + +#if 0 +   /* Send CMD_FINISH to all SPUs */ +   for (i = 0; i < cell->num_spus; i++) { +      send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH); +   } +#endif + +   if (flags & CELL_FLUSH_WAIT) { +      /* Wait for ack */ +      for (i = 0; i < cell->num_spus; i++) { +         uint k = wait_mbox_message(cell_global.spe_contexts[i]); +         assert(k == CELL_CMD_FINISH); +      } +   } + +   flushing = FALSE; +} + + +void +cell_flush_buffer_range(struct cell_context *cell, void *ptr, +			unsigned size) +{ +   STATIC_ASSERT((sizeof(opcode_t) + sizeof(struct cell_buffer_range)) % 16 == 0); +   uint32_t *batch = (uint32_t*)cell_batch_alloc16(cell,  +      sizeof(opcode_t) + sizeof(struct cell_buffer_range)); +   struct cell_buffer_range *br = (struct cell_buffer_range *) &batch[4]; +   batch[0] = CELL_CMD_FLUSH_BUFFER_RANGE; +   br->base = (uintptr_t) ptr; +   br->size = size; +} diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h new file mode 100644 index 0000000000..509ae6239a --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_flush.h @@ -0,0 +1,45 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef CELL_FLUSH +#define CELL_FLUSH + +#define CELL_FLUSH_WAIT 0x80000000 + +extern void +cell_flush(struct pipe_context *pipe, unsigned flags, +           struct pipe_fence_handle **fence); + +extern void +cell_flush_int(struct cell_context *cell, unsigned flags); + +extern void +cell_flush_buffer_range(struct cell_context *cell, void *ptr, +			unsigned size); + +#endif diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c new file mode 100644 index 0000000000..5a889a6119 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -0,0 +1,2046 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2009 VMware, Inc.  All rights reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + + +/** + * Generate SPU fragment program/shader code. + * + * Note that we generate SOA-style code here.  So each TGSI instruction + * operates on four pixels (and is translated into four SPU instructions, + * generally speaking). + * + * \author Brian Paul + */ + +#include <math.h> +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" +#include "rtasm/rtasm_ppc_spe.h" +#include "util/u_memory.h" +#include "cell_context.h" +#include "cell_gen_fp.h" + + +#define MAX_TEMPS 16 +#define MAX_IMMED  8 + +#define CHAN_X  0 +#define CHAN_Y  1 +#define CHAN_Z  2 +#define CHAN_W  3 + +/** + * Context needed during code generation. + */ +struct codegen +{ +   struct cell_context *cell; +   int inputs_reg;      /**< 1st function parameter */ +   int outputs_reg;     /**< 2nd function parameter */ +   int constants_reg;   /**< 3rd function parameter */ +   int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */ +   int imm_regs[MAX_IMMED][4];  /**< maps TGSI immediates to SPE registers */ + +   int num_imm;  /**< number of immediates */ + +   int one_reg;         /**< register containing {1.0, 1.0, 1.0, 1.0} */ + +   int addr_reg;        /**< address register, integer values */ + +   /** Per-instruction temps / intermediate temps */ +   int num_itemps; +   int itemps[12]; + +   /** Current IF/ELSE/ENDIF nesting level */ +   int if_nesting; +   /** Current BGNLOOP/ENDLOOP nesting level */ +   int loop_nesting; +   /** Location of start of current loop */ +   int loop_start; + +   /** Index of if/conditional mask register */ +   int cond_mask_reg; +   /** Index of loop mask register */ +   int loop_mask_reg; + +   /** Index of master execution mask register */ +   int exec_mask_reg; + +   /** KIL mask: indicates which fragments have been killed */ +   int kill_mask_reg; + +   int frame_size;  /**< Stack frame size, in words */ + +   struct spe_function *f; +   boolean error; +}; + + +/** + * Allocate an intermediate temporary register. + */ +static int +get_itemp(struct codegen *gen) +{ +   int t = spe_allocate_available_register(gen->f); +   assert(gen->num_itemps < Elements(gen->itemps)); +   gen->itemps[gen->num_itemps++] = t; +   return t; +} + +/** + * Free all intermediate temporary registers.  To be called after each + * instruction has been emitted. + */ +static void +free_itemps(struct codegen *gen) +{ +   int i; +   for (i = 0; i < gen->num_itemps; i++) { +      spe_release_register(gen->f, gen->itemps[i]); +   } +   gen->num_itemps = 0; +} + + +/** + * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}. + * The register is allocated and initialized upon the first call. + */ +static int +get_const_one_reg(struct codegen *gen) +{ +   if (gen->one_reg <= 0) { +      gen->one_reg = spe_allocate_available_register(gen->f); + +      spe_indent(gen->f, 4); +      spe_comment(gen->f, -4, "init constant reg = 1.0:"); + +      /* one = {1.0, 1.0, 1.0, 1.0} */ +      spe_load_float(gen->f, gen->one_reg, 1.0f); + +      spe_indent(gen->f, -4); +   } + +   return gen->one_reg; +} + + +/** + * Return index of the address register. + * Used for indirect register loads/stores. + */ +static int +get_address_reg(struct codegen *gen) +{ +   if (gen->addr_reg <= 0) { +      gen->addr_reg = spe_allocate_available_register(gen->f); + +      spe_indent(gen->f, 4); +      spe_comment(gen->f, -4, "init address reg = 0:"); + +      /* init addr = {0, 0, 0, 0} */ +      spe_zero(gen->f, gen->addr_reg); + +      spe_indent(gen->f, -4); +   } + +   return gen->addr_reg; +} + + +/** + * Return index of the master execution mask. + * The register is allocated an initialized upon the first call. + * + * The master execution mask controls which pixels in a quad are + * modified, according to surrounding conditionals, loops, etc. + */ +static int +get_exec_mask_reg(struct codegen *gen) +{ +   if (gen->exec_mask_reg <= 0) { +      gen->exec_mask_reg = spe_allocate_available_register(gen->f); + +      /* XXX this may not be needed */ +      spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0"); +      spe_load_int(gen->f, gen->exec_mask_reg, ~0); +   } + +   return gen->exec_mask_reg; +} + + +/** Return index of the conditional (if/else) execution mask register */ +static int +get_cond_mask_reg(struct codegen *gen) +{ +   if (gen->cond_mask_reg <= 0) { +      gen->cond_mask_reg = spe_allocate_available_register(gen->f); +   } + +   return gen->cond_mask_reg; +} + + +/** Return index of the loop execution mask register */ +static int +get_loop_mask_reg(struct codegen *gen) +{ +   if (gen->loop_mask_reg <= 0) { +      gen->loop_mask_reg = spe_allocate_available_register(gen->f); +   } + +   return gen->loop_mask_reg; +} + + + +static boolean +is_register_src(struct codegen *gen, int channel, +                const struct tgsi_full_src_register *src) +{ +   int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); +   int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); + +   if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) { +      return FALSE; +   } +   if (src->SrcRegister.File == TGSI_FILE_TEMPORARY || +       src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { +      return TRUE; +   } +   return FALSE; +} + +   +static boolean +is_memory_dst(struct codegen *gen, int channel, +              const struct tgsi_full_dst_register *dst) +{ +   if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { +      return TRUE; +   } +   else { +      return FALSE; +   } +} + +   +/** + * Return the index of the SPU temporary containing the named TGSI + * source register.  If the TGSI register is a TGSI_FILE_TEMPORARY we + * just return the corresponding SPE register.  If the TGIS register + * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register + * and emit an SPE load instruction. + */ +static int +get_src_reg(struct codegen *gen, +            int channel, +            const struct tgsi_full_src_register *src) +{ +   int reg = -1; +   int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); +   boolean reg_is_itemp = FALSE; +   uint sign_op; + +   assert(swizzle >= TGSI_SWIZZLE_X); +   assert(swizzle <= TGSI_EXTSWIZZLE_ONE); + +   if (swizzle == TGSI_EXTSWIZZLE_ONE) { +      /* Load const one float and early out */ +      reg = get_const_one_reg(gen); +   } +   else if (swizzle == TGSI_EXTSWIZZLE_ZERO) { +      /* Load const zero float and early out */ +      reg = get_itemp(gen); +      spe_xor(gen->f, reg, reg, reg); +   } +   else { +      int index = src->SrcRegister.Index; + +      assert(swizzle < 4); + +      if (src->SrcRegister.Indirect) { +         /* XXX unfinished */ +      } + +      switch (src->SrcRegister.File) { +      case TGSI_FILE_TEMPORARY: +         reg = gen->temp_regs[index][swizzle]; +         break; +      case TGSI_FILE_INPUT: +         { +            /* offset is measured in quadwords, not bytes */ +            int offset = index * 4 + swizzle; +            reg = get_itemp(gen); +            reg_is_itemp = TRUE; +            /* Load:  reg = memory[(machine_reg) + offset] */ +            spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16); +         } +         break; +      case TGSI_FILE_IMMEDIATE: +         reg = gen->imm_regs[index][swizzle]; +         break; +      case TGSI_FILE_CONSTANT: +         { +            /* offset is measured in quadwords, not bytes */ +            int offset = index * 4 + swizzle; +            reg = get_itemp(gen); +            reg_is_itemp = TRUE; +            /* Load:  reg = memory[(machine_reg) + offset] */ +            spe_lqd(gen->f, reg, gen->constants_reg, offset * 16); +         } +         break; +      default: +         assert(0); +      } +   } + +   /* +    * Handle absolute value, negate or set-negative of src register. +    */ +   sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); +   if (sign_op != TGSI_UTIL_SIGN_KEEP) { +      /* +       * All sign ops are done by manipulating bit 31, the IEEE float sign bit. +       */ +      const int bit31mask_reg = get_itemp(gen); +      int result_reg; + +      if (reg_is_itemp) { +         /* re-use 'reg' for the result */ +         result_reg = reg; +      } +      else { +         /* alloc a new reg for the result */ +         result_reg = get_itemp(gen); +      } + +      /* mask with bit 31 set, the rest cleared */ +      spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); + +      if (sign_op == TGSI_UTIL_SIGN_CLEAR) { +         spe_andc(gen->f, result_reg, reg, bit31mask_reg); +      } +      else if (sign_op == TGSI_UTIL_SIGN_SET) { +         spe_and(gen->f, result_reg, reg, bit31mask_reg); +      } +      else { +         assert(sign_op == TGSI_UTIL_SIGN_TOGGLE); +         spe_xor(gen->f, result_reg, reg, bit31mask_reg); +      } + +      reg = result_reg; +   } + +   return reg; +} + + +/** + * Return the index of an SPE register to use for the given TGSI register. + * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the + * corresponding SPE register is returned.  If the TGSI register is + * TGSI_FILE_OUTPUT we allocate an intermediate temporary register. + * See store_dest_reg() below... + */ +static int +get_dst_reg(struct codegen *gen, +            int channel, +            const struct tgsi_full_dst_register *dest) +{ +   int reg = -1; + +   switch (dest->DstRegister.File) { +   case TGSI_FILE_TEMPORARY: +      if (gen->if_nesting > 0 || gen->loop_nesting > 0) +         reg = get_itemp(gen); +      else +         reg = gen->temp_regs[dest->DstRegister.Index][channel]; +      break; +   case TGSI_FILE_OUTPUT: +      reg = get_itemp(gen); +      break; +   default: +      assert(0); +   } + +   return reg; +} + + +/** + * When a TGSI instruction is writing to an output register, this + * function emits the SPE store instruction to store the value_reg. + * \param value_reg  the SPE register containing the value to store. + *                   This would have been returned by get_dst_reg(). + */ +static void +store_dest_reg(struct codegen *gen, +               int value_reg, int channel, +               const struct tgsi_full_dst_register *dest) +{ +   /* +    * XXX need to implement dst reg clamping/saturation +    */ +#if 0 +   switch (inst->Instruction.Saturate) { +   case TGSI_SAT_NONE: +      break; +   case TGSI_SAT_ZERO_ONE: +      break; +   case TGSI_SAT_MINUS_PLUS_ONE: +      break; +   default: +      assert( 0 ); +   } +#endif + +   switch (dest->DstRegister.File) { +   case TGSI_FILE_TEMPORARY: +      if (gen->if_nesting > 0 || gen->loop_nesting > 0) { +         int d_reg = gen->temp_regs[dest->DstRegister.Index][channel]; +         int exec_reg = get_exec_mask_reg(gen); +         /* Mix d with new value according to exec mask: +          * d[i] = mask_reg[i] ? value_reg : d_reg +          */ +         spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg); +      } +      else { +         /* we're not inside a condition or loop: do nothing special */ + +      } +      break; +   case TGSI_FILE_OUTPUT: +      { +         /* offset is measured in quadwords, not bytes */ +         int offset = dest->DstRegister.Index * 4 + channel; +         if (gen->if_nesting > 0 || gen->loop_nesting > 0) { +            int exec_reg = get_exec_mask_reg(gen); +            int curval_reg = get_itemp(gen); +            /* First read the current value from memory: +             * Load:  curval = memory[(machine_reg) + offset] +             */ +            spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); +            /* Mix curval with newvalue according to exec mask: +             * d[i] = mask_reg[i] ? value_reg : d_reg +             */ +            spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg); +            /* Store: memory[(machine_reg) + offset] = curval */ +            spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); +         } +         else { +            /* Store: memory[(machine_reg) + offset] = reg */ +            spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16); +         } +      } +      break; +   default: +      assert(0); +   } +} + + + +static void +emit_prologue(struct codegen *gen) +{ +   gen->frame_size = 1024; /* XXX temporary, should be dynamic */ + +   spe_comment(gen->f, 0, "Function prologue:"); + +   /* save $lr on stack     # stqd $lr,16($sp) */ +   spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); + +   if (gen->frame_size >= 512) { +      /* offset is too large for ai instruction */ +      int offset_reg = spe_allocate_available_register(gen->f); +      int sp_reg = spe_allocate_available_register(gen->f); +      /* offset = -framesize */ +      spe_load_int(gen->f, offset_reg, -gen->frame_size); +      /* sp = $sp */ +      spe_move(gen->f, sp_reg, SPE_REG_SP); +      /* $sp = $sp + offset_reg */ +      spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); +      /* save $sp in stack frame */ +      spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0); +      /* clean up */ +      spe_release_register(gen->f, offset_reg); +      spe_release_register(gen->f, sp_reg); +   } +   else { +      /* save stack pointer    # stqd $sp,-frameSize($sp) */ +      spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); + +      /* adjust stack pointer  # ai $sp,$sp,-frameSize */ +      spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); +   } +} + + +static void +emit_epilogue(struct codegen *gen) +{ +   const int return_reg = 3; + +   spe_comment(gen->f, 0, "Function epilogue:"); + +   spe_comment(gen->f, 0, "return the killed mask"); +   if (gen->kill_mask_reg > 0) { +      /* shader called KIL, return the "alive" mask */ +      spe_move(gen->f, return_reg, gen->kill_mask_reg); +   } +   else { +      /* return {0,0,0,0} */ +      spe_load_uint(gen->f, return_reg, 0); +   } + +   spe_comment(gen->f, 0, "restore stack and return"); +   if (gen->frame_size >= 512) { +      /* offset is too large for ai instruction */ +      int offset_reg = spe_allocate_available_register(gen->f); +      /* offset = framesize */ +      spe_load_int(gen->f, offset_reg, gen->frame_size); +      /* $sp = $sp + offset */ +      spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); +      /* clean up */ +      spe_release_register(gen->f, offset_reg); +   } +   else { +      /* restore stack pointer    # ai $sp,$sp,frameSize */ +      spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size); +   } + +   /* restore $lr              # lqd $lr,16($sp) */ +   spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); + +   /* return from function call */ +   spe_bi(gen->f, SPE_REG_RA, 0, 0); +} + + +#define FOR_EACH_ENABLED_CHANNEL(inst, ch) \ +   for (ch = 0; ch < 4; ch++) \ +      if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) + + +static boolean +emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch = 0, src_reg, addr_reg; + +   src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +   addr_reg = get_address_reg(gen); + +   /* convert float to int */ +   spe_cflts(gen->f, addr_reg, src_reg, 0); + +   free_itemps(gen); + +   return TRUE; +} + + +static boolean +emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch, src_reg[4], dst_reg[4]; + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +      dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +   } + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) && +          is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) { +         /* special-case: register to memory store */ +         store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]); +      } +      else { +         spe_move(gen->f, dst_reg[ch], src_reg[ch]); +         store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]); +      } +   } + +   free_itemps(gen); + +   return TRUE; +} + +/** + * Emit binary operation + */ +static boolean +emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch, s1_reg[4], s2_reg[4], d_reg[4]; + +   /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +      s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); +      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +   } + +   /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      /* Emit actual SPE instruction: d = s1 + s2 */ +      switch (inst->Instruction.Opcode) { +      case TGSI_OPCODE_ADD: +         spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); +         break; +      case TGSI_OPCODE_SUB: +         spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); +         break; +      case TGSI_OPCODE_MUL: +         spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); +         break; +      default: +         ; +      } +   } + +   /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); +   } + +   /* Free any intermediate temps we allocated */ +   free_itemps(gen); + +   return TRUE; +} + + +/** + * Emit multiply add.  See emit_ADD for comments. + */ +static boolean +emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4]; + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +      s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); +      s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); +      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +   } +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]); +   } +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); +   } +   free_itemps(gen); +   return TRUE; +} + + +/** + * Emit linear interpolate.  See emit_ADD for comments. + */ +static boolean +emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4]; + +   /* setup/get src/dst/temp regs */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +      s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); +      s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); +      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +      tmp_reg[ch] = get_itemp(gen); +   } + +   /* d = s3 + s1(s2 - s3) */ +   /* do all subtracts, then all fma, then all stores to better pipeline */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]); +   } +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]); +   } +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); +   } +   free_itemps(gen); +   return TRUE; +} + + + +/** + * Emit reciprocal or recip sqrt. + */ +static boolean +emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch, s1_reg[4], d_reg[4], tmp_reg[4]; + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +      tmp_reg[ch] = get_itemp(gen); +   } + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) { +         /* tmp = 1/s1 */ +         spe_frest(gen->f, tmp_reg[ch], s1_reg[ch]); +      } +      else { +         /* tmp = 1/sqrt(s1) */ +         spe_frsqest(gen->f, tmp_reg[ch], s1_reg[ch]); +      } +   } + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      /* d = float_interp(s1, tmp) */ +      spe_fi(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]); +   } + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); +   } + +   free_itemps(gen); +   return TRUE; +} + + +/** + * Emit absolute value.  See emit_ADD for comments. + */ +static boolean +emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch, s1_reg[4], d_reg[4]; +   const int bit31mask_reg = get_itemp(gen); + +   /* mask with bit 31 set, the rest cleared */   +   spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +   } + +   /* d = sign bit cleared in s1 */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_andc(gen->f, d_reg[ch], s1_reg[ch], bit31mask_reg); +   } + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); +   } + +   free_itemps(gen); +   return TRUE; +} + +/** + * Emit 3 component dot product.  See emit_ADD for comments. + */ +static boolean +emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch; +   int s1x_reg, s1y_reg, s1z_reg; +   int s2x_reg, s2y_reg, s2z_reg; +   int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); + +   s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); +   s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); +   s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); +   s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); +   s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); +   s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + +   /* t0 = x0 * x1 */ +   spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg); + +   /* t1 = y0 * y1 */ +   spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg); + +   /* t0 = z0 * z1 + t0 */ +   spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg); + +   /* t0 = t0 + t1 */ +   spe_fa(gen->f, t0_reg, t0_reg, t1_reg); + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +      spe_move(gen->f, d_reg, t0_reg); +      store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); +   } + +   free_itemps(gen); +   return TRUE; +} + +/** + * Emit 4 component dot product.  See emit_ADD for comments. + */ +static boolean +emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch; +   int s0x_reg, s0y_reg, s0z_reg, s0w_reg; +   int s1x_reg, s1y_reg, s1z_reg, s1w_reg; +   int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); + +   s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); +   s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); +   s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); +   s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); +   s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); +   s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); +   s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]); +   s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); + +   /* t0 = x0 * x1 */ +   spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg); + +   /* t1 = y0 * y1 */ +   spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg); + +   /* t0 = z0 * z1 + t0 */ +   spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg); + +   /* t1 = w0 * w1 + t1 */ +   spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg); + +   /* t0 = t0 + t1 */ +   spe_fa(gen->f, t0_reg, t0_reg, t1_reg); + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +      spe_move(gen->f, d_reg, t0_reg); +      store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); +   } + +   free_itemps(gen); +   return TRUE; +} + +/** + * Emit homogeneous dot product.  See emit_ADD for comments. + */ +static boolean +emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   /* XXX rewrite this function to look more like DP3/DP4 */ +   int ch; +   int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); +   int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); +   int tmp_reg = get_itemp(gen); + +   /* t = x0 * x1 */ +   spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + +   s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); +   s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); +   /* t = y0 * y1 + t */ +   spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + +   s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); +   s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); +   /* t = z0 * z1 + t */ +   spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + +   s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); +   /* t = w1 + t */ +   spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg); + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +      spe_move(gen->f, d_reg, tmp_reg); +      store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); +   } + +   free_itemps(gen); +   return TRUE; +} + +/** + * Emit 3-component vector normalize. + */ +static boolean +emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch; +   int src_reg[3]; +   int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); + +   src_reg[0] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); +   src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); +   src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + +   /* t0 = x * x */ +   spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]); + +   /* t1 = y * y */ +   spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]); + +   /* t0 = z * z + t0 */ +   spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg); + +   /* t0 = t0 + t1 */ +   spe_fa(gen->f, t0_reg, t0_reg, t1_reg); + +   /* t1 = 1.0 / sqrt(t0) */ +   spe_frsqest(gen->f, t1_reg, t0_reg); +   spe_fi(gen->f, t1_reg, t0_reg, t1_reg); + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +      /* dst = src[ch] * t1 */ +      spe_fm(gen->f, d_reg, src_reg[ch], t1_reg); +      store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); +   } + +   free_itemps(gen); +   return TRUE; +} + + +/** + * Emit cross product.  See emit_ADD for comments. + */ +static boolean +emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); +   int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); +   int tmp_reg = get_itemp(gen); + +   /* t = z0 * y1 */ +   spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + +   s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); +   s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); +   /* t = y0 * z1 - t */ +   spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + +   if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) { +      store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]); +   } + +   s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); +   s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); +   /* t = x0 * z1 */ +   spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + +   s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); +   s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); +   /* t = z0 * x1 - t */ +   spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + +   if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) { +      store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]); +   } + +   s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); +   s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); +   /* t = y0 * x1 */ +   spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + +   s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); +   s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); +   /* t = x0 * y1 - t */ +   spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + +   if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) { +      store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]); +   } + +   free_itemps(gen); +   return TRUE; +} + + +/** + * Emit inequality instruction. + * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as + * the result but OpenGL/TGSI needs 0.0 and 1.0 results. + * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND. + */ +static boolean +emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg; +   bool complement = FALSE; + +   one_reg = get_const_one_reg(gen); + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +      s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); +      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +   } + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      switch (inst->Instruction.Opcode) { +      case TGSI_OPCODE_SGT: +         spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); +         break; +      case TGSI_OPCODE_SLT: +         spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); +         break; +      case TGSI_OPCODE_SGE: +         spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); +         complement = TRUE; +         break; +      case TGSI_OPCODE_SLE: +         spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); +         complement = TRUE; +         break; +      case TGSI_OPCODE_SEQ: +         spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); +         break; +      case TGSI_OPCODE_SNE: +         spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); +         complement = TRUE; +         break; +      default: +         assert(0); +      } +   } + +   /* convert d from 0x0/0xffffffff to 0.0/1.0 */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      /* d = d & one_reg */ +      if (complement) +         spe_andc(gen->f, d_reg[ch], one_reg, d_reg[ch]); +      else +         spe_and(gen->f, d_reg[ch], one_reg, d_reg[ch]); +   } + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); +   } + +   free_itemps(gen); +   return TRUE; +} + + +/** + * Emit compare. + */ +static boolean +emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch; + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +      int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); +      int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); +      int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +      int zero_reg = get_itemp(gen); +    +      spe_zero(gen->f, zero_reg); + +      /* d = (s1 < 0) ? s2 : s3 */ +      spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); +      spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg); + +      store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); +      free_itemps(gen); +   } + +   return TRUE; +} + +/** + * Emit trunc.   + * Convert float to signed int + * Convert signed int to float + */ +static boolean +emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch, s1_reg[4], d_reg[4]; + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +   } + +   /* Convert float to int */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_cflts(gen->f, d_reg[ch], s1_reg[ch], 0); +   } + +   /* Convert int to float */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_csflt(gen->f, d_reg[ch], d_reg[ch], 0); +   } + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); +   } + +   free_itemps(gen); +   return TRUE; +} + + +/** + * Emit floor.   + * If negative int subtract one + * Convert float to signed int + * Convert signed int to float + */ +static boolean +emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; + +   zero_reg = get_itemp(gen); +   spe_zero(gen->f, zero_reg); +   one_reg = get_const_one_reg(gen); +    +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +      tmp_reg[ch] = get_itemp(gen); +   } + +   /* If negative, subtract 1.0 */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]); +   } +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]); +   } +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]); +   } + +   /* Convert float to int */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0); +   } + +   /* Convert int to float */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_csflt(gen->f, d_reg[ch], tmp_reg[ch], 0); +   } + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); +   } + +   free_itemps(gen); +   return TRUE; +} + + +/** + * Compute frac = Input - FLR(Input) + */ +static boolean +emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; + +   zero_reg = get_itemp(gen); +   spe_zero(gen->f, zero_reg); +   one_reg = get_const_one_reg(gen); + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +      tmp_reg[ch] = get_itemp(gen); +   } + +   /* If negative, subtract 1.0 */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]); +   } +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]); +   } +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]); +   } + +   /* Convert float to int */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0); +   } + +   /* Convert int to float */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_csflt(gen->f, tmp_reg[ch], tmp_reg[ch], 0); +   } + +   /* d = s1 - FLR(s1) */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_fs(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]); +   } + +   /* store result */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); +   } + +   free_itemps(gen); +   return TRUE; +} + + +#if 0 +static void +print_functions(struct cell_context *cell) +{ +   struct cell_spu_function_info *funcs = &cell->spu_functions; +   uint i; +   for (i = 0; i < funcs->num; i++) { +      printf("SPU func %u: %s at %u\n", +             i, funcs->names[i], funcs->addrs[i]); +   } +} +#endif + + +static uint +lookup_function(struct cell_context *cell, const char *funcname) +{ +   const struct cell_spu_function_info *funcs = &cell->spu_functions; +   uint i, addr = 0; +   for (i = 0; i < funcs->num; i++) { +      if (strcmp(funcs->names[i], funcname) == 0) { +         addr = funcs->addrs[i]; +      } +   } +   assert(addr && "spu function not found"); +   return addr / 4;  /* discard 2 least significant bits */ +} + + +/** + * Emit code to call a SPU function. + * Used to implement instructions like SIN/COS/POW/TEX/etc. + * If scalar, only the X components of the src regs are used, and the + * result is replicated across the dest register's XYZW components. + */ +static boolean +emit_function_call(struct codegen *gen, +                   const struct tgsi_full_instruction *inst, +                   char *funcname, uint num_args, boolean scalar) +{ +   const uint addr = lookup_function(gen->cell, funcname); +   char comment[100]; +   int s_regs[3]; +   int func_called = FALSE; +   uint a, ch; +   int retval_reg = -1; + +   assert(num_args <= 3); + +   snprintf(comment, sizeof(comment), "CALL %s:", funcname); +   spe_comment(gen->f, -4, comment); + +   if (scalar) { +      for (a = 0; a < num_args; a++) { +         s_regs[a] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[a]); +      } +      /* we'll call the function, put the return value in this register, +       * then replicate it across all write-enabled components in d_reg. +       */ +      retval_reg = spe_allocate_available_register(gen->f); +   } + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      int d_reg; +      ubyte usedRegs[SPE_NUM_REGS]; +      uint i, numUsed; + +      if (!scalar) { +         for (a = 0; a < num_args; a++) { +            s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]); +         } +      } + +      d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + +      if (!scalar || !func_called) { +         /* for a scalar function, we'll really only call the function once */ + +         numUsed = spe_get_registers_used(gen->f, usedRegs); +         assert(numUsed < gen->frame_size / 16 - 2); + +         /* save registers to stack */ +         for (i = 0; i < numUsed; i++) { +            uint reg = usedRegs[i]; +            int offset = 2 + i; +            spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); +         } + +         /* setup function arguments */ +         for (a = 0; a < num_args; a++) { +            spe_move(gen->f, 3 + a, s_regs[a]); +         } + +         /* branch to function, save return addr */ +         spe_brasl(gen->f, SPE_REG_RA, addr); + +         /* save function's return value */ +         if (scalar) +            spe_move(gen->f, retval_reg, 3); +         else +            spe_move(gen->f, d_reg, 3); + +         /* restore registers from stack */ +         for (i = 0; i < numUsed; i++) { +            uint reg = usedRegs[i]; +            if (reg != d_reg && reg != retval_reg) { +               int offset = 2 + i; +               spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); +            } +         } + +         func_called = TRUE; +      } + +      if (scalar) { +         spe_move(gen->f, d_reg, retval_reg); +      } + +      store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); +      free_itemps(gen); +   } + +   if (scalar) { +      spe_release_register(gen->f, retval_reg); +   } + +   return TRUE; +} + + +static boolean +emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   const uint target = inst->InstructionExtTexture.Texture; +   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; +   uint addr; +   int ch; +   int coord_regs[4], d_regs[4]; + +   switch (target) { +   case TGSI_TEXTURE_1D: +   case TGSI_TEXTURE_2D: +      addr = lookup_function(gen->cell, "spu_tex_2d"); +      break; +   case TGSI_TEXTURE_3D: +      addr = lookup_function(gen->cell, "spu_tex_3d"); +      break; +   case TGSI_TEXTURE_CUBE: +      addr = lookup_function(gen->cell, "spu_tex_cube"); +      break; +   default: +      ASSERT(0 && "unsupported texture target"); +      return FALSE; +   } + +   assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER); + +   spe_comment(gen->f, -4, "CALL tex:"); + +   /* get src/dst reg info */ +   for (ch = 0; ch < 4; ch++) { +      coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +      d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +   } + +   { +      ubyte usedRegs[SPE_NUM_REGS]; +      uint i, numUsed; + +      numUsed = spe_get_registers_used(gen->f, usedRegs); +      assert(numUsed < gen->frame_size / 16 - 2); + +      /* save registers to stack */ +      for (i = 0; i < numUsed; i++) { +         uint reg = usedRegs[i]; +         int offset = 2 + i; +         spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); +      } + +      /* setup function arguments (XXX depends on target) */ +      for (i = 0; i < 4; i++) { +         spe_move(gen->f, 3 + i, coord_regs[i]); +      } +      spe_load_uint(gen->f, 7, unit); /* sampler unit */ + +      /* branch to function, save return addr */ +      spe_brasl(gen->f, SPE_REG_RA, addr); + +      /* save function's return values (four pixel's colors) */ +      for (i = 0; i < 4; i++) { +         spe_move(gen->f, d_regs[i], 3 + i); +      } + +      /* restore registers from stack */ +      for (i = 0; i < numUsed; i++) { +         uint reg = usedRegs[i]; +         if (reg != d_regs[0] && +             reg != d_regs[1] && +             reg != d_regs[2] && +             reg != d_regs[3]) { +            int offset = 2 + i; +            spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); +         } +      } +   } + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]); +      free_itemps(gen); +   } + +   return TRUE; +} + + +/** + * KILL if any of src reg values are less than zero. + */ +static boolean +emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch; +   int s_regs[4], kil_reg = -1, cmp_reg, zero_reg; + +   spe_comment(gen->f, -4, "CALL kil:"); + +   /* zero = {0,0,0,0} */ +   zero_reg = get_itemp(gen); +   spe_zero(gen->f, zero_reg); + +   cmp_reg = get_itemp(gen); + +   /* get src regs */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +   } + +   /* test if any src regs are < 0 */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      if (kil_reg >= 0) { +         /* cmp = 0 > src ? : ~0 : 0 */ +         spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]); +         /* kil = kil | cmp */ +         spe_or(gen->f, kil_reg, kil_reg, cmp_reg); +      } +      else { +         kil_reg = get_itemp(gen); +         /* kil = 0 > src ? : ~0 : 0 */ +         spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]); +      } +   } + +   if (gen->if_nesting || gen->loop_nesting) { +      /* may have been a conditional kil */ +      spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg); +   } + +   /* allocate the kill mask reg if needed */ +   if (gen->kill_mask_reg <= 0) { +      gen->kill_mask_reg = spe_allocate_available_register(gen->f); +      spe_move(gen->f, gen->kill_mask_reg, kil_reg); +   } +   else { +      spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg); +   } + +   free_itemps(gen); + +   return TRUE; +} + + + +/** + * Emit min or max. + */ +static boolean +emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); +      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); +      tmp_reg[ch] = get_itemp(gen);          +   } + +   /* d = (s0 > s1) ? s0 : s1 */ +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      if (inst->Instruction.Opcode == TGSI_OPCODE_MAX) +         spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]); +      else +         spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]); +   } +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]); +   } + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); +   } + +   free_itemps(gen); +   return TRUE; +} + + +/** + * Emit code to update the execution mask. + * This needs to be done whenever the execution status of a conditional + * or loop is changed. + */ +static void +emit_update_exec_mask(struct codegen *gen) +{ +   const int exec_reg = get_exec_mask_reg(gen); +   const int cond_reg = gen->cond_mask_reg; +   const int loop_reg = gen->loop_mask_reg; + +   spe_comment(gen->f, 0, "Update master execution mask"); + +   if (gen->if_nesting > 0 && gen->loop_nesting > 0) { +      /* exec_mask = cond_mask & loop_mask */ +      assert(cond_reg > 0); +      assert(loop_reg > 0); +      spe_and(gen->f, exec_reg, cond_reg, loop_reg); +   } +   else if (gen->if_nesting > 0) { +      assert(cond_reg > 0); +      spe_move(gen->f, exec_reg, cond_reg); +   } +   else if (gen->loop_nesting > 0) { +      assert(loop_reg > 0); +      spe_move(gen->f, exec_reg, loop_reg); +   } +   else { +      spe_load_int(gen->f, exec_reg, ~0x0); +   } +} + + +static boolean +emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   const int channel = 0; +   int cond_reg; + +   cond_reg = get_cond_mask_reg(gen); + +   /* XXX push cond exec mask */ + +   spe_comment(gen->f,  0, "init conditional exec mask = ~0:"); +   spe_load_int(gen->f, cond_reg, ~0); + +   /* update conditional execution mask with the predicate register */ +   int tmp_reg = get_itemp(gen); +   int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]); + +   /* tmp = (s1_reg == 0) */ +   spe_ceqi(gen->f, tmp_reg, s1_reg, 0); +   /* tmp = !tmp */ +   spe_complement(gen->f, tmp_reg, tmp_reg); +   /* cond_mask = cond_mask & tmp */ +   spe_and(gen->f, cond_reg, cond_reg, tmp_reg); + +   gen->if_nesting++; + +   /* update the master execution mask */ +   emit_update_exec_mask(gen); + +   free_itemps(gen); + +   return TRUE; +} + + +static boolean +emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   const int cond_reg = get_cond_mask_reg(gen); + +   spe_comment(gen->f, 0, "cond exec mask = !cond exec mask"); +   spe_complement(gen->f, cond_reg, cond_reg); +   emit_update_exec_mask(gen); + +   return TRUE; +} + + +static boolean +emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   /* XXX todo: pop cond exec mask */ + +   gen->if_nesting--; + +   emit_update_exec_mask(gen); + +   return TRUE; +} + + +static boolean +emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   int exec_reg, loop_reg; + +   exec_reg = get_exec_mask_reg(gen); +   loop_reg = get_loop_mask_reg(gen); + +   /* XXX push loop_exec mask */ + +   spe_comment(gen->f,  0*-4, "initialize loop exec mask = ~0"); +   spe_load_int(gen->f, loop_reg, ~0x0); + +   gen->loop_nesting++; +   gen->loop_start = spe_code_size(gen->f);  /* in bytes */ + +   return TRUE; +} + + +static boolean +emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   const int loop_reg = get_loop_mask_reg(gen); +   const int tmp_reg = get_itemp(gen); +   int offset; + +   /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */ +   spe_orx(gen->f, tmp_reg, loop_reg); + +   offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */ + +   /* branch back to top of loop if tmp_reg != 0 */ +   spe_brnz(gen->f, tmp_reg, offset / 4); + +   /* XXX pop loop_exec mask */ + +   gen->loop_nesting--; + +   emit_update_exec_mask(gen); + +   return TRUE; +} + + +static boolean +emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   const int exec_reg = get_exec_mask_reg(gen); +   const int loop_reg = get_loop_mask_reg(gen); + +   assert(gen->loop_nesting > 0); + +   spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask"); +   spe_andc(gen->f, loop_reg, loop_reg, exec_reg); + +   emit_update_exec_mask(gen); + +   return TRUE; +} + + +static boolean +emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ +   assert(gen->loop_nesting > 0); + +   return TRUE; +} + + +static boolean +emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, +             boolean ddx) +{ +   int ch; + +   FOR_EACH_ENABLED_CHANNEL(inst, ch) { +      int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); +      int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + +      int t1_reg = get_itemp(gen); +      int t2_reg = get_itemp(gen); + +      spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */ +      if (ddx) { +         spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */ +      } +      else { +         spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */ +      } +      spe_fs(gen->f, d_reg, t2_reg, t1_reg); + +      free_itemps(gen); +   } + +   return TRUE; +} + + + + +/** + * Emit END instruction. + * We just return from the shader function at this point. + * + * Note that there may be more code after this that would be + * called by TGSI_OPCODE_CALL. + */ +static boolean +emit_END(struct codegen *gen) +{ +   emit_epilogue(gen); +   return TRUE; +} + + +/** + * Emit code for the given instruction.  Just a big switch stmt. + */ +static boolean +emit_instruction(struct codegen *gen, +                 const struct tgsi_full_instruction *inst) +{ +   switch (inst->Instruction.Opcode) { +   case TGSI_OPCODE_ARL: +      return emit_ARL(gen, inst); +   case TGSI_OPCODE_MOV: +   case TGSI_OPCODE_SWZ: +      return emit_MOV(gen, inst); +   case TGSI_OPCODE_ADD: +   case TGSI_OPCODE_SUB: +   case TGSI_OPCODE_MUL: +      return emit_binop(gen, inst); +   case TGSI_OPCODE_MAD: +      return emit_MAD(gen, inst); +   case TGSI_OPCODE_LERP: +      return emit_LERP(gen, inst); +   case TGSI_OPCODE_DP3: +      return emit_DP3(gen, inst); +   case TGSI_OPCODE_DP4: +      return emit_DP4(gen, inst); +   case TGSI_OPCODE_DPH: +      return emit_DPH(gen, inst); +   case TGSI_OPCODE_NRM: +      return emit_NRM3(gen, inst); +   case TGSI_OPCODE_XPD: +      return emit_XPD(gen, inst); +   case TGSI_OPCODE_RCP: +   case TGSI_OPCODE_RSQ: +      return emit_RCP_RSQ(gen, inst); +   case TGSI_OPCODE_ABS: +      return emit_ABS(gen, inst); +   case TGSI_OPCODE_SGT: +   case TGSI_OPCODE_SLT: +   case TGSI_OPCODE_SGE: +   case TGSI_OPCODE_SLE: +   case TGSI_OPCODE_SEQ: +   case TGSI_OPCODE_SNE: +      return emit_inequality(gen, inst); +   case TGSI_OPCODE_CMP: +      return emit_CMP(gen, inst); +   case TGSI_OPCODE_MIN: +   case TGSI_OPCODE_MAX: +      return emit_MIN_MAX(gen, inst); +   case TGSI_OPCODE_TRUNC: +      return emit_TRUNC(gen, inst); +   case TGSI_OPCODE_FLR: +      return emit_FLR(gen, inst); +   case TGSI_OPCODE_FRC: +      return emit_FRC(gen, inst); +   case TGSI_OPCODE_END: +      return emit_END(gen); + +   case TGSI_OPCODE_COS: +      return emit_function_call(gen, inst, "spu_cos", 1, TRUE); +   case TGSI_OPCODE_SIN: +      return emit_function_call(gen, inst, "spu_sin", 1, TRUE); +   case TGSI_OPCODE_POW: +      return emit_function_call(gen, inst, "spu_pow", 2, TRUE); +   case TGSI_OPCODE_EXPBASE2: +      return emit_function_call(gen, inst, "spu_exp2", 1, TRUE); +   case TGSI_OPCODE_LOGBASE2: +      return emit_function_call(gen, inst, "spu_log2", 1, TRUE); +   case TGSI_OPCODE_TEX: +      /* fall-through for now */ +   case TGSI_OPCODE_TXD: +      /* fall-through for now */ +   case TGSI_OPCODE_TXB: +      /* fall-through for now */ +   case TGSI_OPCODE_TXL: +      /* fall-through for now */ +   case TGSI_OPCODE_TXP: +      return emit_TEX(gen, inst); +   case TGSI_OPCODE_KIL: +      return emit_KIL(gen, inst); + +   case TGSI_OPCODE_IF: +      return emit_IF(gen, inst); +   case TGSI_OPCODE_ELSE: +      return emit_ELSE(gen, inst); +   case TGSI_OPCODE_ENDIF: +      return emit_ENDIF(gen, inst); + +   case TGSI_OPCODE_BGNLOOP2: +      return emit_BGNLOOP(gen, inst); +   case TGSI_OPCODE_ENDLOOP2: +      return emit_ENDLOOP(gen, inst); +   case TGSI_OPCODE_BRK: +      return emit_BRK(gen, inst); +   case TGSI_OPCODE_CONT: +      return emit_CONT(gen, inst); + +   case TGSI_OPCODE_DDX: +      return emit_DDX_DDY(gen, inst, TRUE); +   case TGSI_OPCODE_DDY: +      return emit_DDX_DDY(gen, inst, FALSE); + +   /* XXX lots more cases to do... */ + +   default: +      fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n", +              inst->Instruction.Opcode); +      return FALSE; +   } + +   return TRUE; +} + + + +/** + * Emit code for a TGSI immediate value (vector of four floats). + * This involves register allocation and initialization. + * XXX the initialization should be done by a "prepare" stage, not + * per quad execution! + */ +static boolean +emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) +{ +   int ch; + +   assert(gen->num_imm < MAX_TEMPS); + +   for (ch = 0; ch < 4; ch++) { +      float val = immed->u.ImmediateFloat32[ch].Float; + +      if (ch > 0 && val == immed->u.ImmediateFloat32[ch - 1].Float) { +         /* re-use previous register */ +         gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1]; +      } +      else { +         char str[100]; +         int reg = spe_allocate_available_register(gen->f); + +         if (reg < 0) +            return FALSE; + +         sprintf(str, "init $%d = %f", reg, val); +         spe_comment(gen->f, 0, str); + +         /* update immediate map */ +         gen->imm_regs[gen->num_imm][ch] = reg; + +         /* emit initializer instruction */ +         spe_load_float(gen->f, reg, val); +      } +   } + +   gen->num_imm++; + +   return TRUE; +} + + + +/** + * Emit "code" for a TGSI declaration. + * We only care about TGSI TEMPORARY register declarations at this time. + * For each TGSI TEMPORARY we allocate four SPE registers. + */ +static boolean +emit_declaration(struct cell_context *cell, +                 struct codegen *gen, const struct tgsi_full_declaration *decl) +{ +   int i, ch; + +   switch (decl->Declaration.File) { +   case TGSI_FILE_TEMPORARY: +      for (i = decl->DeclarationRange.First; +           i <= decl->DeclarationRange.Last; +           i++) { +         assert(i < MAX_TEMPS); +         for (ch = 0; ch < 4; ch++) { +            gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f); +            if (gen->temp_regs[i][ch] < 0) +               return FALSE; /* out of regs */ +         } + +         /* XXX if we run out of SPE registers, we need to spill +          * to SPU memory.  someday... +          */ + +         { +            char buf[100]; +            sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i, +                    gen->temp_regs[i][0], gen->temp_regs[i][1], +                    gen->temp_regs[i][2], gen->temp_regs[i][3]); +            spe_comment(gen->f, 0, buf); +         } +      } +      break; +   default: +      ; /* ignore */ +   } + +   return TRUE; +} + + + +/** + * Translate TGSI shader code to SPE instructions.  This is done when + * the state tracker gives us a new shader (via pipe->create_fs_state()). + * + * \param cell    the rendering context (in) + * \param tokens  the TGSI shader (in) + * \param f       the generated function (out) + */ +boolean +cell_gen_fragment_program(struct cell_context *cell, +                          const struct tgsi_token *tokens, +                          struct spe_function *f) +{ +   struct tgsi_parse_context parse; +   struct codegen gen; +   uint ic = 0; + +   memset(&gen, 0, sizeof(gen)); +   gen.cell = cell; +   gen.f = f; + +   /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ +   gen.inputs_reg = 3;     /* pointer to inputs array */ +   gen.outputs_reg = 4;    /* pointer to outputs array */ +   gen.constants_reg = 5;  /* pointer to constants array */ + +   spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); +   spe_allocate_register(f, gen.inputs_reg); +   spe_allocate_register(f, gen.outputs_reg); +   spe_allocate_register(f, gen.constants_reg); + +   if (cell->debug_flags & CELL_DEBUG_ASM) { +      spe_print_code(f, TRUE); +      spe_indent(f, 2*8); +      printf("Begin %s\n", __FUNCTION__); +      tgsi_dump(tokens, 0); +   } + +   tgsi_parse_init(&parse, tokens); + +   emit_prologue(&gen); + +   while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) { +      tgsi_parse_token(&parse); + +      switch (parse.FullToken.Token.Type) { +      case TGSI_TOKEN_TYPE_IMMEDIATE: +         if (f->print) { +            _debug_printf("    # "); +            tgsi_dump_immediate(&parse.FullToken.FullImmediate); +         } +         if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) +            gen.error = TRUE; +         break; + +      case TGSI_TOKEN_TYPE_DECLARATION: +         if (f->print) { +            _debug_printf("    # "); +            tgsi_dump_declaration(&parse.FullToken.FullDeclaration); +         } +         if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration)) +            gen.error = TRUE; +         break; + +      case TGSI_TOKEN_TYPE_INSTRUCTION: +         if (f->print) { +            _debug_printf("    # "); +            ic++; +            tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); +         } +         if (!emit_instruction(&gen, &parse.FullToken.FullInstruction)) +            gen.error = TRUE; +         break; + +      default: +         assert(0); +      } +   } + +   if (gen.error) { +      /* terminate the SPE code */ +      return emit_END(&gen); +   } + +   if (cell->debug_flags & CELL_DEBUG_ASM) { +      printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst); +      printf("End %s\n", __FUNCTION__); +   } + +   tgsi_parse_free( &parse ); + +   return !gen.error; +} diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.h b/src/gallium/drivers/cell/ppu/cell_gen_fp.h new file mode 100644 index 0000000000..99faea7046 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.h @@ -0,0 +1,42 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + + +#ifndef CELL_GEN_FP_H +#define CELL_GEN_FP_H + + + +extern boolean +cell_gen_fragment_program(struct cell_context *cell, +                          const struct tgsi_token *tokens, +                          struct spe_function *f); + + +#endif /* CELL_GEN_FP_H */ + diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c new file mode 100644 index 0000000000..66d4b3b6a3 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -0,0 +1,2181 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2009 VMware, Inc.  All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * Generate SPU per-fragment code (actually per-quad code). + * \author Brian Paul + * \author Bob Ellison + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "rtasm/rtasm_ppc_spe.h" +#include "cell_context.h" +#include "cell_gen_fragment.h" + + + +/** Do extra optimizations? */ +#define OPTIMIZATIONS 1 + + +/** + * Generate SPE code to perform Z/depth testing. + * + * \param dsa         Gallium depth/stencil/alpha state to gen code for + * \param f           SPE function to append instruction onto. + * \param mask_reg    register containing quad/pixel "alive" mask (in/out) + * \param ifragZ_reg  register containing integer fragment Z values (in) + * \param ifbZ_reg    register containing integer frame buffer Z values (in/out) + * \param zmask_reg   register containing result of Z test/comparison (out) + * + * Returns TRUE if the Z-buffer needs to be updated. + */ +static boolean +gen_depth_test(struct spe_function *f, +               const struct pipe_depth_stencil_alpha_state *dsa, +               int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) +{ +   /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_ +    * quantities.  This only makes a difference for 32-bit Z values though. +    */ +   ASSERT(dsa->depth.enabled); + +   switch (dsa->depth.func) { +   case PIPE_FUNC_EQUAL: +      /* zmask = (ifragZ == ref) */ +      spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); +      /* mask = (mask & zmask) */ +      spe_and(f, mask_reg, mask_reg, zmask_reg); +      break; + +   case PIPE_FUNC_NOTEQUAL: +      /* zmask = (ifragZ == ref) */ +      spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); +      /* mask = (mask & ~zmask) */ +      spe_andc(f, mask_reg, mask_reg, zmask_reg); +      break; + +   case PIPE_FUNC_GREATER: +      /* zmask = (ifragZ > ref) */ +      spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); +      /* mask = (mask & zmask) */ +      spe_and(f, mask_reg, mask_reg, zmask_reg); +      break; + +   case PIPE_FUNC_LESS: +      /* zmask = (ref > ifragZ) */ +      spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); +      /* mask = (mask & zmask) */ +      spe_and(f, mask_reg, mask_reg, zmask_reg); +      break; + +   case PIPE_FUNC_LEQUAL: +      /* zmask = (ifragZ > ref) */ +      spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); +      /* mask = (mask & ~zmask) */ +      spe_andc(f, mask_reg, mask_reg, zmask_reg); +      break; + +   case PIPE_FUNC_GEQUAL: +      /* zmask = (ref > ifragZ) */ +      spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); +      /* mask = (mask & ~zmask) */ +      spe_andc(f, mask_reg, mask_reg, zmask_reg); +      break; + +   case PIPE_FUNC_NEVER: +      spe_il(f, mask_reg, 0);  /* mask = {0,0,0,0} */ +      spe_move(f, zmask_reg, mask_reg);  /* zmask = mask */ +      break; + +   case PIPE_FUNC_ALWAYS: +      /* mask unchanged */ +      spe_il(f, zmask_reg, ~0);  /* zmask = {~0,~0,~0,~0} */ +      break; + +   default: +      ASSERT(0); +      break; +   } + +   if (dsa->depth.writemask) { +      /* +       * If (ztest passed) { +       *    framebufferZ = fragmentZ; +       * } +       * OR, +       * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ; +       */ +      spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg); +      return TRUE; +   } + +   return FALSE; +} + + +/** + * Generate SPE code to perform alpha testing. + * + * \param dsa        Gallium depth/stencil/alpha state to gen code for + * \param f          SPE function to append instruction onto. + * \param mask_reg   register containing quad/pixel "alive" mask (in/out) + * \param fragA_reg  register containing four fragment alpha values (in) + */ +static void +gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, +               struct spe_function *f, int mask_reg, int fragA_reg) +{ +   int ref_reg = spe_allocate_available_register(f); +   int amask_reg = spe_allocate_available_register(f); + +   ASSERT(dsa->alpha.enabled); + +   if ((dsa->alpha.func != PIPE_FUNC_NEVER) && +       (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { +      /* load/splat the alpha reference float value */ +      spe_load_float(f, ref_reg, dsa->alpha.ref_value); +   } + +   /* emit code to do the alpha comparison, updating 'mask' */ +   switch (dsa->alpha.func) { +   case PIPE_FUNC_EQUAL: +      /* amask = (fragA == ref) */ +      spe_fceq(f, amask_reg, fragA_reg, ref_reg); +      /* mask = (mask & amask) */ +      spe_and(f, mask_reg, mask_reg, amask_reg); +      break; + +   case PIPE_FUNC_NOTEQUAL: +      /* amask = (fragA == ref) */ +      spe_fceq(f, amask_reg, fragA_reg, ref_reg); +      /* mask = (mask & ~amask) */ +      spe_andc(f, mask_reg, mask_reg, amask_reg); +      break; + +   case PIPE_FUNC_GREATER: +      /* amask = (fragA > ref) */ +      spe_fcgt(f, amask_reg, fragA_reg, ref_reg); +      /* mask = (mask & amask) */ +      spe_and(f, mask_reg, mask_reg, amask_reg); +      break; + +   case PIPE_FUNC_LESS: +      /* amask = (ref > fragA) */ +      spe_fcgt(f, amask_reg, ref_reg, fragA_reg); +      /* mask = (mask & amask) */ +      spe_and(f, mask_reg, mask_reg, amask_reg); +      break; + +   case PIPE_FUNC_LEQUAL: +      /* amask = (fragA > ref) */ +      spe_fcgt(f, amask_reg, fragA_reg, ref_reg); +      /* mask = (mask & ~amask) */ +      spe_andc(f, mask_reg, mask_reg, amask_reg); +      break; + +   case PIPE_FUNC_GEQUAL: +      /* amask = (ref > fragA) */ +      spe_fcgt(f, amask_reg, ref_reg, fragA_reg); +      /* mask = (mask & ~amask) */ +      spe_andc(f, mask_reg, mask_reg, amask_reg); +      break; + +   case PIPE_FUNC_NEVER: +      spe_il(f, mask_reg, 0);  /* mask = [0,0,0,0] */ +      break; + +   case PIPE_FUNC_ALWAYS: +      /* no-op, mask unchanged */ +      break; + +   default: +      ASSERT(0); +      break; +   } + +#if OPTIMIZATIONS +   /* if mask == {0,0,0,0} we're all done, return */ +   { +      /* re-use amask reg here */ +      int tmp_reg = amask_reg; +      /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */ +      spe_orx(f, tmp_reg, mask_reg); +      /* if tmp[0] == 0 then return from function call */ +      spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0); +   } +#endif + +   spe_release_register(f, ref_reg); +   spe_release_register(f, amask_reg); +} + + +/** + * This pair of functions is used inline to allocate and deallocate + * optional constant registers.  Once a constant is discovered to be  + * needed, we will likely need it again, so we don't want to deallocate + * it and have to allocate and load it again unnecessarily. + */ +static INLINE void +setup_optional_register(struct spe_function *f, +                        int *r) +{ +   if (*r < 0) +      *r = spe_allocate_available_register(f); +} + +static INLINE void +release_optional_register(struct spe_function *f, +                          int r) +{ +   if (r >= 0) +      spe_release_register(f, r); +} + +static INLINE void +setup_const_register(struct spe_function *f, +                     int *r, +                     float value) +{ +   if (*r >= 0) +      return; +   setup_optional_register(f, r); +   spe_load_float(f, *r, value); +} + +static INLINE void +release_const_register(struct spe_function *f, +                       int r) +{ +   release_optional_register(f, r); +} + + + +/** + * Unpack/convert framebuffer colors from four 32-bit packed colors + * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA). + * Each 8-bit color component is expanded into a float in [0.0, 1.0]. + */ +static void +unpack_colors(struct spe_function *f, +              enum pipe_format color_format, +              int fbRGBA_reg, +              int fbR_reg, int fbG_reg, int fbB_reg, int fbA_reg) +{ +   int mask0_reg = spe_allocate_available_register(f); +   int mask1_reg = spe_allocate_available_register(f); +   int mask2_reg = spe_allocate_available_register(f); +   int mask3_reg = spe_allocate_available_register(f); + +   spe_load_int(f, mask0_reg, 0xff); +   spe_load_int(f, mask1_reg, 0xff00); +   spe_load_int(f, mask2_reg, 0xff0000); +   spe_load_int(f, mask3_reg, 0xff000000); + +   spe_comment(f, 0, "Unpack framebuffer colors, convert to floats"); + +   switch (color_format) { +   case PIPE_FORMAT_A8R8G8B8_UNORM: +      /* fbB = fbRGBA & mask */ +      spe_and(f, fbB_reg, fbRGBA_reg, mask0_reg); + +      /* fbG = fbRGBA & mask */ +      spe_and(f, fbG_reg, fbRGBA_reg, mask1_reg); + +      /* fbR = fbRGBA & mask */ +      spe_and(f, fbR_reg, fbRGBA_reg, mask2_reg); + +      /* fbA = fbRGBA & mask */ +      spe_and(f, fbA_reg, fbRGBA_reg, mask3_reg); + +      /* fbG = fbG >> 8 */ +      spe_roti(f, fbG_reg, fbG_reg, -8); + +      /* fbR = fbR >> 16 */ +      spe_roti(f, fbR_reg, fbR_reg, -16); + +      /* fbA = fbA >> 24 */ +      spe_roti(f, fbA_reg, fbA_reg, -24); +      break; + +   case PIPE_FORMAT_B8G8R8A8_UNORM: +      /* fbA = fbRGBA & mask */ +      spe_and(f, fbA_reg, fbRGBA_reg, mask0_reg); + +      /* fbR = fbRGBA & mask */ +      spe_and(f, fbR_reg, fbRGBA_reg, mask1_reg); + +      /* fbG = fbRGBA & mask */ +      spe_and(f, fbG_reg, fbRGBA_reg, mask2_reg); + +      /* fbB = fbRGBA & mask */ +      spe_and(f, fbB_reg, fbRGBA_reg, mask3_reg); + +      /* fbR = fbR >> 8 */ +      spe_roti(f, fbR_reg, fbR_reg, -8); + +      /* fbG = fbG >> 16 */ +      spe_roti(f, fbG_reg, fbG_reg, -16); + +      /* fbB = fbB >> 24 */ +      spe_roti(f, fbB_reg, fbB_reg, -24); +      break; + +   default: +      ASSERT(0); +   } + +   /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */ +   spe_cuflt(f, fbR_reg, fbR_reg, 8); +   spe_cuflt(f, fbG_reg, fbG_reg, 8); +   spe_cuflt(f, fbB_reg, fbB_reg, 8); +   spe_cuflt(f, fbA_reg, fbA_reg, 8); + +   spe_release_register(f, mask0_reg); +   spe_release_register(f, mask1_reg); +   spe_release_register(f, mask2_reg); +   spe_release_register(f, mask3_reg); +} + + +/** + * Generate SPE code to implement the given blend mode for a quad of pixels. + * \param f          SPE function to append instruction onto. + * \param fragR_reg  register with fragment red values (float) (in/out) + * \param fragG_reg  register with fragment green values (float) (in/out) + * \param fragB_reg  register with fragment blue values (float) (in/out) + * \param fragA_reg  register with fragment alpha values (float) (in/out) + * \param fbRGBA_reg register with packed framebuffer colors (integer) (in) + */ +static void +gen_blend(const struct pipe_blend_state *blend, +          const struct pipe_blend_color *blend_color, +          struct spe_function *f, +          enum pipe_format color_format, +          int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, +          int fbRGBA_reg) +{ +   int term1R_reg = spe_allocate_available_register(f); +   int term1G_reg = spe_allocate_available_register(f); +   int term1B_reg = spe_allocate_available_register(f); +   int term1A_reg = spe_allocate_available_register(f); + +   int term2R_reg = spe_allocate_available_register(f); +   int term2G_reg = spe_allocate_available_register(f); +   int term2B_reg = spe_allocate_available_register(f); +   int term2A_reg = spe_allocate_available_register(f); + +   int fbR_reg = spe_allocate_available_register(f); +   int fbG_reg = spe_allocate_available_register(f); +   int fbB_reg = spe_allocate_available_register(f); +   int fbA_reg = spe_allocate_available_register(f); + +   int tmp_reg = spe_allocate_available_register(f); + +   /* Optional constant registers we might or might not end up using; +    * if we do use them, make sure we only allocate them once by +    * keeping a flag on each one. +    */ +   int one_reg = -1; +   int constR_reg = -1, constG_reg = -1, constB_reg = -1, constA_reg = -1; + +   ASSERT(blend->blend_enable); + +   /* packed RGBA -> float colors */ +   unpack_colors(f, color_format, fbRGBA_reg, +                 fbR_reg, fbG_reg, fbB_reg, fbA_reg); + +   /* +    * Compute Src RGB terms.  We're actually looking for the value +    * of (the appropriate RGB factors) * (the incoming source RGB color), +    * because in some cases (like PIPE_BLENDFACTOR_ONE and  +    * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math. +    */ +   switch (blend->rgb_src_factor) { +   case PIPE_BLENDFACTOR_ONE: +      /* factors = (1,1,1), so term = (R,G,B) */ +      spe_move(f, term1R_reg, fragR_reg); +      spe_move(f, term1G_reg, fragG_reg); +      spe_move(f, term1B_reg, fragB_reg); +      break; +   case PIPE_BLENDFACTOR_ZERO: +      /* factors = (0,0,0), so term = (0,0,0) */ +      spe_load_float(f, term1R_reg, 0.0f); +      spe_load_float(f, term1G_reg, 0.0f); +      spe_load_float(f, term1B_reg, 0.0f); +      break; +   case PIPE_BLENDFACTOR_SRC_COLOR: +      /* factors = (R,G,B), so term = (R*R, G*G, B*B) */ +      spe_fm(f, term1R_reg, fragR_reg, fragR_reg); +      spe_fm(f, term1G_reg, fragG_reg, fragG_reg); +      spe_fm(f, term1B_reg, fragB_reg, fragB_reg); +      break; +   case PIPE_BLENDFACTOR_SRC_ALPHA: +      /* factors = (A,A,A), so term = (R*A, G*A, B*A) */ +      spe_fm(f, term1R_reg, fragR_reg, fragA_reg); +      spe_fm(f, term1G_reg, fragG_reg, fragA_reg); +      spe_fm(f, term1B_reg, fragB_reg, fragA_reg); +      break; +   case PIPE_BLENDFACTOR_INV_SRC_COLOR: +      /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B))  +       * or in other words term = (R-R*R, G-G*G, B-B*B) +       * fnms(a,b,c,d) computes a = d - b*c +       */ +      spe_fnms(f, term1R_reg, fragR_reg, fragR_reg, fragR_reg); +      spe_fnms(f, term1G_reg, fragG_reg, fragG_reg, fragG_reg); +      spe_fnms(f, term1B_reg, fragB_reg, fragB_reg, fragB_reg); +      break; +   case PIPE_BLENDFACTOR_DST_COLOR: +      /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */ +      spe_fm(f, term1R_reg, fragR_reg, fbR_reg); +      spe_fm(f, term1G_reg, fragG_reg, fbG_reg); +      spe_fm(f, term1B_reg, fragB_reg, fbB_reg); +      break; +   case PIPE_BLENDFACTOR_INV_DST_COLOR: +      /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) +       * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb) +       * fnms(a,b,c,d) computes a = d - b*c +       */ +      spe_fnms(f, term1R_reg, fragR_reg, fbR_reg, fragR_reg); +      spe_fnms(f, term1G_reg, fragG_reg, fbG_reg, fragG_reg); +      spe_fnms(f, term1B_reg, fragB_reg, fbB_reg, fragB_reg); +      break; +   case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +      /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) +       * or term = (R-R*A,G-G*A,B-B*A) +       * fnms(a,b,c,d) computes a = d - b*c +       */ +      spe_fnms(f, term1R_reg, fragR_reg, fragA_reg, fragR_reg); +      spe_fnms(f, term1G_reg, fragG_reg, fragA_reg, fragG_reg); +      spe_fnms(f, term1B_reg, fragB_reg, fragA_reg, fragB_reg); +      break; +   case PIPE_BLENDFACTOR_DST_ALPHA: +      /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */ +      spe_fm(f, term1R_reg, fragR_reg, fbA_reg); +      spe_fm(f, term1G_reg, fragG_reg, fbA_reg); +      spe_fm(f, term1B_reg, fragB_reg, fbA_reg); +      break; +   case PIPE_BLENDFACTOR_INV_DST_ALPHA: +      /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb))  +       * or term = (R-R*Afb,G-G*Afb,b-B*Afb) +       * fnms(a,b,c,d) computes a = d - b*c +       */ +      spe_fnms(f, term1R_reg, fragR_reg, fbA_reg, fragR_reg); +      spe_fnms(f, term1G_reg, fragG_reg, fbA_reg, fragG_reg); +      spe_fnms(f, term1B_reg, fragB_reg, fbA_reg, fragB_reg); +      break; +   case PIPE_BLENDFACTOR_CONST_COLOR: +      /* We need the optional constant color registers */ +      setup_const_register(f, &constR_reg, blend_color->color[0]); +      setup_const_register(f, &constG_reg, blend_color->color[1]); +      setup_const_register(f, &constB_reg, blend_color->color[2]); +      /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */ +      spe_fm(f, term1R_reg, fragR_reg, constR_reg); +      spe_fm(f, term1G_reg, fragG_reg, constG_reg); +      spe_fm(f, term1B_reg, fragB_reg, constB_reg); +      break; +   case PIPE_BLENDFACTOR_CONST_ALPHA: +      /* we'll need the optional constant alpha register */ +      setup_const_register(f, &constA_reg, blend_color->color[3]); +      /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */ +      spe_fm(f, term1R_reg, fragR_reg, constA_reg); +      spe_fm(f, term1G_reg, fragG_reg, constA_reg); +      spe_fm(f, term1B_reg, fragB_reg, constA_reg); +      break; +   case PIPE_BLENDFACTOR_INV_CONST_COLOR: +      /* We need the optional constant color registers */ +      setup_const_register(f, &constR_reg, blend_color->color[0]); +      setup_const_register(f, &constG_reg, blend_color->color[1]); +      setup_const_register(f, &constB_reg, blend_color->color[2]); +      /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc))  +       * or term = (R-R*Rc, G-G*Gc, B-B*Bc) +       * fnms(a,b,c,d) computes a = d - b*c +       */ +      spe_fnms(f, term1R_reg, fragR_reg, constR_reg, fragR_reg); +      spe_fnms(f, term1G_reg, fragG_reg, constG_reg, fragG_reg); +      spe_fnms(f, term1B_reg, fragB_reg, constB_reg, fragB_reg); +      break; +   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: +      /* We need the optional constant color registers */ +      setup_const_register(f, &constR_reg, blend_color->color[0]); +      setup_const_register(f, &constG_reg, blend_color->color[1]); +      setup_const_register(f, &constB_reg, blend_color->color[2]); +      /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) +       * or term = (R-R*Ac,G-G*Ac,B-B*Ac) +       * fnms(a,b,c,d) computes a = d - b*c +       */ +      spe_fnms(f, term1R_reg, fragR_reg, constA_reg, fragR_reg); +      spe_fnms(f, term1G_reg, fragG_reg, constA_reg, fragG_reg); +      spe_fnms(f, term1B_reg, fragB_reg, constA_reg, fragB_reg); +      break; +   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: +      /* We'll need the optional {1,1,1,1} register */ +      setup_const_register(f, &one_reg, 1.0f); +      /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so  +       * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb)) +       * We could expand the term (as a*min(b,c) == min(a*b,a*c) +       * as long as a is positive), but then we'd have to do three +       * spe_float_min() functions instead of one, so this is simpler. +       */ +      /* tmp = 1 - Afb */ +      spe_fs(f, tmp_reg, one_reg, fbA_reg); +      /* tmp = min(A,tmp) */ +      spe_float_min(f, tmp_reg, fragA_reg, tmp_reg); +      /* term = R*tmp */ +      spe_fm(f, term1R_reg, fragR_reg, tmp_reg); +      spe_fm(f, term1G_reg, fragG_reg, tmp_reg); +      spe_fm(f, term1B_reg, fragB_reg, tmp_reg); +      break; + +      /* These are special D3D cases involving a second color output +       * from the fragment shader.  I'm not sure we can support them +       * yet... XXX +       */ +   case PIPE_BLENDFACTOR_SRC1_COLOR: +   case PIPE_BLENDFACTOR_SRC1_ALPHA: +   case PIPE_BLENDFACTOR_INV_SRC1_COLOR: +   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + +   default: +      ASSERT(0); +   } + +   /* +    * Compute Src Alpha term.  Like the above, we're looking for +    * the full term A*factor, not just the factor itself, because +    * in many cases we can avoid doing unnecessary multiplies. +    */ +   switch (blend->alpha_src_factor) { +   case PIPE_BLENDFACTOR_ZERO: +      /* factor = 0, so term = 0 */ +      spe_load_float(f, term1A_reg, 0.0f); +      break; + +   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* fall through */ +   case PIPE_BLENDFACTOR_ONE: +      /* factor = 1, so term = A */ +      spe_move(f, term1A_reg, fragA_reg); +      break; + +   case PIPE_BLENDFACTOR_SRC_COLOR: +      /* factor = A, so term = A*A */ +      spe_fm(f, term1A_reg, fragA_reg, fragA_reg); +      break; +   case PIPE_BLENDFACTOR_SRC_ALPHA: +      spe_fm(f, term1A_reg, fragA_reg, fragA_reg); +      break; + +   case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ +   case PIPE_BLENDFACTOR_INV_SRC_COLOR: +      /* factor = 1-A, so term = A*(1-A) = A-A*A */ +      /* fnms(a,b,c,d) computes a = d - b*c */ +      spe_fnms(f, term1A_reg, fragA_reg, fragA_reg, fragA_reg); +      break; + +   case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ +   case PIPE_BLENDFACTOR_DST_COLOR: +      /* factor = Afb, so term = A*Afb */ +      spe_fm(f, term1A_reg, fragA_reg, fbA_reg); +      break; + +   case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ +   case PIPE_BLENDFACTOR_INV_DST_COLOR: +      /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */ +      /* fnms(a,b,c,d) computes a = d - b*c */ +      spe_fnms(f, term1A_reg, fragA_reg, fbA_reg, fragA_reg); +      break; + +   case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ +   case PIPE_BLENDFACTOR_CONST_COLOR: +      /* We need the optional constA_reg register */ +      setup_const_register(f, &constA_reg, blend_color->color[3]); +      /* factor = Ac, so term = A*Ac */ +      spe_fm(f, term1A_reg, fragA_reg, constA_reg); +      break; + +   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ +   case PIPE_BLENDFACTOR_INV_CONST_COLOR: +      /* We need the optional constA_reg register */ +      setup_const_register(f, &constA_reg, blend_color->color[3]); +      /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */ +      /* fnms(a,b,c,d) computes a = d - b*c */ +      spe_fnms(f, term1A_reg, fragA_reg, constA_reg, fragA_reg); +      break; + +      /* These are special D3D cases involving a second color output +       * from the fragment shader.  I'm not sure we can support them +       * yet... XXX +       */ +   case PIPE_BLENDFACTOR_SRC1_COLOR: +   case PIPE_BLENDFACTOR_SRC1_ALPHA: +   case PIPE_BLENDFACTOR_INV_SRC1_COLOR: +   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: +   default: +      ASSERT(0); +   } + +   /* +    * Compute Dest RGB term.  Like the above, we're looking for +    * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because +    * in many cases we can avoid doing unnecessary multiplies. +    */ +   switch (blend->rgb_dst_factor) { +   case PIPE_BLENDFACTOR_ONE: +      /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */ +      spe_move(f, term2R_reg, fbR_reg); +      spe_move(f, term2G_reg, fbG_reg); +      spe_move(f, term2B_reg, fbB_reg); +      break; +   case PIPE_BLENDFACTOR_ZERO: +      /* factor s= (0,0,0), so term = (0,0,0) */ +      spe_load_float(f, term2R_reg, 0.0f); +      spe_load_float(f, term2G_reg, 0.0f); +      spe_load_float(f, term2B_reg, 0.0f); +      break; +   case PIPE_BLENDFACTOR_SRC_COLOR: +      /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */ +      spe_fm(f, term2R_reg, fbR_reg, fragR_reg); +      spe_fm(f, term2G_reg, fbG_reg, fragG_reg); +      spe_fm(f, term2B_reg, fbB_reg, fragB_reg); +      break; +   case PIPE_BLENDFACTOR_INV_SRC_COLOR: +      /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B))  +       * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B) +       * fnms(a,b,c,d) computes a = d - b*c +       */ +      spe_fnms(f, term2R_reg, fragR_reg, fbR_reg, fbR_reg); +      spe_fnms(f, term2G_reg, fragG_reg, fbG_reg, fbG_reg); +      spe_fnms(f, term2B_reg, fragB_reg, fbB_reg, fbB_reg); +      break; +   case PIPE_BLENDFACTOR_SRC_ALPHA: +      /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */ +      spe_fm(f, term2R_reg, fbR_reg, fragA_reg); +      spe_fm(f, term2G_reg, fbG_reg, fragA_reg); +      spe_fm(f, term2B_reg, fbB_reg, fragA_reg); +      break; +   case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +      /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */ +      /* fnms(a,b,c,d) computes a = d - b*c */ +      spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg); +      spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg); +      spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg); +      break; +   case PIPE_BLENDFACTOR_DST_COLOR: +      /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */ +      spe_fm(f, term2R_reg, fbR_reg, fbR_reg); +      spe_fm(f, term2G_reg, fbG_reg, fbG_reg); +      spe_fm(f, term2B_reg, fbB_reg, fbB_reg); +      break; +   case PIPE_BLENDFACTOR_INV_DST_COLOR: +      /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb)) +       * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb) +       * fnms(a,b,c,d) computes a = d - b*c +       */ +      spe_fnms(f, term2R_reg, fbR_reg, fbR_reg, fbR_reg); +      spe_fnms(f, term2G_reg, fbG_reg, fbG_reg, fbG_reg); +      spe_fnms(f, term2B_reg, fbB_reg, fbB_reg, fbB_reg); +      break; + +   case PIPE_BLENDFACTOR_DST_ALPHA: +      /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */ +      spe_fm(f, term2R_reg, fbR_reg, fbA_reg); +      spe_fm(f, term2G_reg, fbG_reg, fbA_reg); +      spe_fm(f, term2B_reg, fbB_reg, fbA_reg); +      break; +   case PIPE_BLENDFACTOR_INV_DST_ALPHA: +      /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb))  +       * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb) +       * fnms(a,b,c,d) computes a = d - b*c +       */ +      spe_fnms(f, term2R_reg, fbR_reg, fbA_reg, fbR_reg); +      spe_fnms(f, term2G_reg, fbG_reg, fbA_reg, fbG_reg); +      spe_fnms(f, term2B_reg, fbB_reg, fbA_reg, fbB_reg); +      break; +   case PIPE_BLENDFACTOR_CONST_COLOR: +      /* We need the optional constant color registers */ +      setup_const_register(f, &constR_reg, blend_color->color[0]); +      setup_const_register(f, &constG_reg, blend_color->color[1]); +      setup_const_register(f, &constB_reg, blend_color->color[2]); +      /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */ +      spe_fm(f, term2R_reg, fbR_reg, constR_reg); +      spe_fm(f, term2G_reg, fbG_reg, constG_reg); +      spe_fm(f, term2B_reg, fbB_reg, constB_reg); +      break; +   case PIPE_BLENDFACTOR_CONST_ALPHA: +      /* we'll need the optional constant alpha register */ +      setup_const_register(f, &constA_reg, blend_color->color[3]); +      /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */ +      spe_fm(f, term2R_reg, fbR_reg, constA_reg); +      spe_fm(f, term2G_reg, fbG_reg, constA_reg); +      spe_fm(f, term2B_reg, fbB_reg, constA_reg); +      break; +   case PIPE_BLENDFACTOR_INV_CONST_COLOR: +      /* We need the optional constant color registers */ +      setup_const_register(f, &constR_reg, blend_color->color[0]); +      setup_const_register(f, &constG_reg, blend_color->color[1]); +      setup_const_register(f, &constB_reg, blend_color->color[2]); +      /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc))  +       * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc) +       * fnms(a,b,c,d) computes a = d - b*c +       */ +      spe_fnms(f, term2R_reg, fbR_reg, constR_reg, fbR_reg); +      spe_fnms(f, term2G_reg, fbG_reg, constG_reg, fbG_reg); +      spe_fnms(f, term2B_reg, fbB_reg, constB_reg, fbB_reg); +      break; +   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: +      /* We need the optional constant color registers */ +      setup_const_register(f, &constR_reg, blend_color->color[0]); +      setup_const_register(f, &constG_reg, blend_color->color[1]); +      setup_const_register(f, &constB_reg, blend_color->color[2]); +      /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac)) +       * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac) +       * fnms(a,b,c,d) computes a = d - b*c +       */ +      spe_fnms(f, term2R_reg, fbR_reg, constA_reg, fbR_reg); +      spe_fnms(f, term2G_reg, fbG_reg, constA_reg, fbG_reg); +      spe_fnms(f, term2B_reg, fbB_reg, constA_reg, fbB_reg); +      break; +   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest RGB */ +      ASSERT(0); +      break; + +      /* These are special D3D cases involving a second color output +       * from the fragment shader.  I'm not sure we can support them +       * yet... XXX +       */ +   case PIPE_BLENDFACTOR_SRC1_COLOR: +   case PIPE_BLENDFACTOR_SRC1_ALPHA: +   case PIPE_BLENDFACTOR_INV_SRC1_COLOR: +   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + +   default: +      ASSERT(0); +   } + +   /* +    * Compute Dest Alpha term.  Like the above, we're looking for +    * the full term Afb*factor, not just the factor itself, because +    * in many cases we can avoid doing unnecessary multiplies. +    */ +   switch (blend->alpha_dst_factor) { +   case PIPE_BLENDFACTOR_ONE: +      /* factor = 1, so term = Afb */ +      spe_move(f, term2A_reg, fbA_reg); +      break; +   case PIPE_BLENDFACTOR_ZERO: +      /* factor = 0, so term = 0 */ +      spe_load_float(f, term2A_reg, 0.0f); +      break; + +   case PIPE_BLENDFACTOR_SRC_ALPHA: /* fall through */ +   case PIPE_BLENDFACTOR_SRC_COLOR: +      /* factor = A, so term = Afb*A */ +      spe_fm(f, term2A_reg, fbA_reg, fragA_reg); +      break; + +   case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ +   case PIPE_BLENDFACTOR_INV_SRC_COLOR: +      /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */ +      /* fnms(a,b,c,d) computes a = d - b*c */ +      spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg); +      break; + +   case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ +   case PIPE_BLENDFACTOR_DST_COLOR: +      /* factor = Afb, so term = Afb*Afb */ +      spe_fm(f, term2A_reg, fbA_reg, fbA_reg); +      break; + +   case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ +   case PIPE_BLENDFACTOR_INV_DST_COLOR: +      /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */ +      /* fnms(a,b,c,d) computes a = d - b*c */ +      spe_fnms(f, term2A_reg, fbA_reg, fbA_reg, fbA_reg); +      break; + +   case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ +   case PIPE_BLENDFACTOR_CONST_COLOR: +      /* We need the optional constA_reg register */ +      setup_const_register(f, &constA_reg, blend_color->color[3]); +      /* factor = Ac, so term = Afb*Ac */ +      spe_fm(f, term2A_reg, fbA_reg, constA_reg); +      break; + +   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ +   case PIPE_BLENDFACTOR_INV_CONST_COLOR: +      /* We need the optional constA_reg register */ +      setup_const_register(f, &constA_reg, blend_color->color[3]); +      /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */ +      /* fnms(a,b,c,d) computes a = d - b*c */ +      spe_fnms(f, term2A_reg, fbA_reg, constA_reg, fbA_reg); +      break; + +   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest alpha */ +      ASSERT(0); +      break; + +      /* These are special D3D cases involving a second color output +       * from the fragment shader.  I'm not sure we can support them +       * yet... XXX +       */ +   case PIPE_BLENDFACTOR_SRC1_COLOR: +   case PIPE_BLENDFACTOR_SRC1_ALPHA: +   case PIPE_BLENDFACTOR_INV_SRC1_COLOR: +   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: +   default: +      ASSERT(0); +   } + +   /* +    * Combine Src/Dest RGB terms as per the blend equation. +    */ +   switch (blend->rgb_func) { +   case PIPE_BLEND_ADD: +      spe_fa(f, fragR_reg, term1R_reg, term2R_reg); +      spe_fa(f, fragG_reg, term1G_reg, term2G_reg); +      spe_fa(f, fragB_reg, term1B_reg, term2B_reg); +      break; +   case PIPE_BLEND_SUBTRACT: +      spe_fs(f, fragR_reg, term1R_reg, term2R_reg); +      spe_fs(f, fragG_reg, term1G_reg, term2G_reg); +      spe_fs(f, fragB_reg, term1B_reg, term2B_reg); +      break; +   case PIPE_BLEND_REVERSE_SUBTRACT: +      spe_fs(f, fragR_reg, term2R_reg, term1R_reg); +      spe_fs(f, fragG_reg, term2G_reg, term1G_reg); +      spe_fs(f, fragB_reg, term2B_reg, term1B_reg); +      break; +   case PIPE_BLEND_MIN: +      spe_float_min(f, fragR_reg, term1R_reg, term2R_reg); +      spe_float_min(f, fragG_reg, term1G_reg, term2G_reg); +      spe_float_min(f, fragB_reg, term1B_reg, term2B_reg); +      break; +   case PIPE_BLEND_MAX: +      spe_float_max(f, fragR_reg, term1R_reg, term2R_reg); +      spe_float_max(f, fragG_reg, term1G_reg, term2G_reg); +      spe_float_max(f, fragB_reg, term1B_reg, term2B_reg); +      break; +   default: +      ASSERT(0); +   } + +   /* +    * Combine Src/Dest A term +    */ +   switch (blend->alpha_func) { +   case PIPE_BLEND_ADD: +      spe_fa(f, fragA_reg, term1A_reg, term2A_reg); +      break; +   case PIPE_BLEND_SUBTRACT: +      spe_fs(f, fragA_reg, term1A_reg, term2A_reg); +      break; +   case PIPE_BLEND_REVERSE_SUBTRACT: +      spe_fs(f, fragA_reg, term2A_reg, term1A_reg); +      break; +   case PIPE_BLEND_MIN: +      spe_float_min(f, fragA_reg, term1A_reg, term2A_reg); +      break; +   case PIPE_BLEND_MAX: +      spe_float_max(f, fragA_reg, term1A_reg, term2A_reg); +      break; +   default: +      ASSERT(0); +   } + +   spe_release_register(f, term1R_reg); +   spe_release_register(f, term1G_reg); +   spe_release_register(f, term1B_reg); +   spe_release_register(f, term1A_reg); + +   spe_release_register(f, term2R_reg); +   spe_release_register(f, term2G_reg); +   spe_release_register(f, term2B_reg); +   spe_release_register(f, term2A_reg); + +   spe_release_register(f, fbR_reg); +   spe_release_register(f, fbG_reg); +   spe_release_register(f, fbB_reg); +   spe_release_register(f, fbA_reg); + +   spe_release_register(f, tmp_reg); + +   /* Free any optional registers that actually got used */ +   release_const_register(f, one_reg); +   release_const_register(f, constR_reg); +   release_const_register(f, constG_reg); +   release_const_register(f, constB_reg); +   release_const_register(f, constA_reg); +} + + +static void +gen_logicop(const struct pipe_blend_state *blend, +            struct spe_function *f, +            int fragRGBA_reg, int fbRGBA_reg) +{ +   /* We've got four 32-bit RGBA packed pixels in each of +    * fragRGBA_reg and fbRGBA_reg, not sets of floating-point +    * reds, greens, blues, and alphas. +    * */ +   ASSERT(blend->logicop_enable); + +   switch(blend->logicop_func) { +      case PIPE_LOGICOP_CLEAR: /* 0 */ +         spe_zero(f, fragRGBA_reg); +         break; +      case PIPE_LOGICOP_NOR: /* ~(s | d) */ +         spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); +         break; +      case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */ +         /* andc R, A, B computes R = A & ~B */ +         spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); +         break; +      case PIPE_LOGICOP_COPY_INVERTED: /* ~s */ +         spe_complement(f, fragRGBA_reg, fragRGBA_reg); +         break; +      case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */ +         /* andc R, A, B computes R = A & ~B */ +         spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); +         break; +      case PIPE_LOGICOP_INVERT: /* ~d */ +         /* Note that (A nor A) == ~(A|A) == ~A */ +         spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg); +         break; +      case PIPE_LOGICOP_XOR: /* s ^ d */ +         spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); +         break; +      case PIPE_LOGICOP_NAND: /* ~(s & d) */ +         spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); +         break; +      case PIPE_LOGICOP_AND: /* s & d */ +         spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); +         break; +      case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */ +         spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); +         spe_complement(f, fragRGBA_reg, fragRGBA_reg); +         break; +      case PIPE_LOGICOP_NOOP: /* d */ +         spe_move(f, fragRGBA_reg, fbRGBA_reg); +         break; +      case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */ +         /* orc R, A, B computes R = A | ~B */ +         spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); +         break; +      case PIPE_LOGICOP_COPY: /* s */ +         break; +      case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */ +         /* orc R, A, B computes R = A | ~B */ +         spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); +         break; +      case PIPE_LOGICOP_OR: /* s | d */ +         spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); +         break; +      case PIPE_LOGICOP_SET: /* 1 */ +         spe_load_int(f, fragRGBA_reg, 0xffffffff); +         break; +      default: +         ASSERT(0); +   } +} + + +/** + * Generate code to pack a quad of float colors into four 32-bit integers. + * + * \param f             SPE function to append instruction onto. + * \param color_format  the dest color packing format + * \param r_reg         register containing four red values (in/clobbered) + * \param g_reg         register containing four green values (in/clobbered) + * \param b_reg         register containing four blue values (in/clobbered) + * \param a_reg         register containing four alpha values (in/clobbered) + * \param rgba_reg      register to store the packed RGBA colors (out) + */ +static void +gen_pack_colors(struct spe_function *f, +                enum pipe_format color_format, +                int r_reg, int g_reg, int b_reg, int a_reg, +                int rgba_reg) +{ +   int rg_reg = spe_allocate_available_register(f); +   int ba_reg = spe_allocate_available_register(f); + +   /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ +   spe_cfltu(f, r_reg, r_reg, 32); +   spe_cfltu(f, g_reg, g_reg, 32); +   spe_cfltu(f, b_reg, b_reg, 32); +   spe_cfltu(f, a_reg, a_reg, 32); + +   /* Shift the most significant bytes to the least significant positions. +    * I.e.: reg = reg >> 24 +    */ +   spe_rotmi(f, r_reg, r_reg, -24); +   spe_rotmi(f, g_reg, g_reg, -24); +   spe_rotmi(f, b_reg, b_reg, -24); +   spe_rotmi(f, a_reg, a_reg, -24); + +   /* Shift the color bytes according to the surface format */ +   if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { +      spe_roti(f, g_reg, g_reg, 8);   /* green <<= 8 */ +      spe_roti(f, r_reg, r_reg, 16);  /* red <<= 16 */ +      spe_roti(f, a_reg, a_reg, 24);  /* alpha <<= 24 */ +   } +   else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { +      spe_roti(f, r_reg, r_reg, 8);   /* red <<= 8 */ +      spe_roti(f, g_reg, g_reg, 16);  /* green <<= 16 */ +      spe_roti(f, b_reg, b_reg, 24);  /* blue <<= 24 */ +   } +   else { +      ASSERT(0); +   } + +   /* Merge red, green, blue, alpha registers to make packed RGBA colors. +    * Eg: after shifting according to color_format we might have: +    *     R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000} +    *     G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600} +    *     B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099} +    *     A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000} +    * OR-ing all those together gives us four packed colors: +    *  RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} +    */ +   spe_or(f, rg_reg, r_reg, g_reg); +   spe_or(f, ba_reg, a_reg, b_reg); +   spe_or(f, rgba_reg, rg_reg, ba_reg); + +   spe_release_register(f, rg_reg); +   spe_release_register(f, ba_reg); +} + + +static void +gen_colormask(struct spe_function *f, +              uint colormask, +              enum pipe_format color_format, +              int fragRGBA_reg, int fbRGBA_reg) +{ +   /* We've got four 32-bit RGBA packed pixels in each of +    * fragRGBA_reg and fbRGBA_reg, not sets of floating-point +    * reds, greens, blues, and alphas.  Further, the pixels +    * are packed according to the given color format, not +    * necessarily RGBA... +    */ +   uint r_mask; +   uint g_mask; +   uint b_mask; +   uint a_mask; + +   /* Calculate exactly where the bits for any particular color +    * end up, so we can mask them correctly. +    */ +   switch(color_format) { +      case PIPE_FORMAT_A8R8G8B8_UNORM: +         /* ARGB */ +         a_mask = 0xff000000; +         r_mask = 0x00ff0000; +         g_mask = 0x0000ff00; +         b_mask = 0x000000ff; +         break; +      case PIPE_FORMAT_B8G8R8A8_UNORM: +         /* BGRA */ +         b_mask = 0xff000000; +         g_mask = 0x00ff0000; +         r_mask = 0x0000ff00; +         a_mask = 0x000000ff; +         break; +      default: +         ASSERT(0); +   } + +   /* For each R, G, B, and A component we're supposed to mask out,  +    * clear its bits.   Then our mask operation later will work  +    * as expected. +    */ +   if (!(colormask & PIPE_MASK_R)) { +      r_mask = 0; +   } +   if (!(colormask & PIPE_MASK_G)) { +      g_mask = 0; +   } +   if (!(colormask & PIPE_MASK_B)) { +      b_mask = 0; +   } +   if (!(colormask & PIPE_MASK_A)) { +      a_mask = 0; +   } + +   /* Get a temporary register to hold the mask that will be applied +    * to the fragment +    */ +   int colormask_reg = spe_allocate_available_register(f); + +   /* The actual mask we're going to use is an OR of the remaining R, G, B, +    * and A masks.  Load the result value into our temporary register. +    */ +   spe_load_uint(f, colormask_reg, r_mask | g_mask | b_mask | a_mask); + +   /* Use the mask register to select between the fragment color +    * values and the frame buffer color values.  Wherever the +    * mask has a 0 bit, the current frame buffer color should override +    * the fragment color.  Wherever the mask has a 1 bit, the  +    * fragment color should persevere.  The Select Bits (selb rt, rA, rB, rM) +    * instruction will select bits from its first operand rA wherever the +    * the mask bits rM are 0, and from its second operand rB wherever the +    * mask bits rM are 1.  That means that the frame buffer color is the +    * first operand, and the fragment color the second. +    */ +    spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg); + +    /* Release the temporary register and we're done */ +    spe_release_register(f, colormask_reg); +} + + +/** + * This function is annoyingly similar to gen_depth_test(), above, except + * that instead of comparing two varying values (i.e. fragment and buffer), + * we're comparing a varying value with a static value.  As such, we have + * access to the Compare Immediate instructions where we don't in  + * gen_depth_test(), which is what makes us very different. + * + * There's some added complexity if there's a non-trivial state->mask + * value; then stencil and reference both must be masked + * + * The return value in the stencil_pass_reg is a bitmask of valid + * fragments that also passed the stencil test.  The bitmask of valid + * fragments that failed would be found in + * (fragment_mask_reg & ~stencil_pass_reg). + */ +static void +gen_stencil_test(struct spe_function *f, +                 const struct pipe_stencil_state *state,  +                 uint stencil_max_value, +                 int fragment_mask_reg, +                 int fbS_reg,  +                 int stencil_pass_reg) +{ +   /* Generate code that puts the set of passing fragments into the +    * stencil_pass_reg register, taking into account whether each fragment +    * was active to begin with. +    */ +   switch (state->func) { +   case PIPE_FUNC_EQUAL: +      if (state->valuemask == stencil_max_value) { +         /* stencil_pass = fragment_mask & (s == reference) */ +         spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); +         spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); +      } +      else { +         /* stencil_pass = fragment_mask & ((s&mask) == (reference&mask)) */ +         uint tmp_masked_stencil = spe_allocate_available_register(f); +         spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); +         spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil, +                                state->valuemask & state->ref_value); +         spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); +         spe_release_register(f, tmp_masked_stencil); +      } +      break; + +   case PIPE_FUNC_NOTEQUAL: +      if (state->valuemask == stencil_max_value) { +         /* stencil_pass = fragment_mask & ~(s == reference) */ +         spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); +         spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); +      } +      else { +         /* stencil_pass = fragment_mask & ~((s&mask) == (reference&mask)) */ +         int tmp_masked_stencil = spe_allocate_available_register(f); +         spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); +         spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil, +                                state->valuemask & state->ref_value); +         spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); +         spe_release_register(f, tmp_masked_stencil); +      } +      break; + +   case PIPE_FUNC_LESS: +      if (state->valuemask == stencil_max_value) { +         /* stencil_pass = fragment_mask & (reference < s)  */ +         spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); +         spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); +      } +      else { +         /* stencil_pass = fragment_mask & ((reference&mask) < (s & mask)) */ +         int tmp_masked_stencil = spe_allocate_available_register(f); +         spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); +         spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, +                                  state->valuemask & state->ref_value); +         spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); +         spe_release_register(f, tmp_masked_stencil); +      } +      break; + +   case PIPE_FUNC_GREATER: +      if (state->valuemask == stencil_max_value) { +         /* stencil_pass = fragment_mask & (reference > s) */ +         /* There's no convenient Compare Less Than Immediate instruction, so +          * we'll have to do this one the harder way, by loading a register and  +          * comparing directly.  Compare Logical Greater Than Word (clgt)  +          * treats its operands as unsigned - no sign extension. +          */ +         int tmp_reg = spe_allocate_available_register(f); +         spe_load_uint(f, tmp_reg, state->ref_value); +         spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); +         spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); +         spe_release_register(f, tmp_reg); +      } +      else { +         /* stencil_pass = fragment_mask & ((reference&mask) > (s&mask)) */ +         int tmp_reg = spe_allocate_available_register(f); +         int tmp_masked_stencil = spe_allocate_available_register(f); +         spe_load_uint(f, tmp_reg, state->valuemask & state->ref_value); +         spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); +         spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil); +         spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); +         spe_release_register(f, tmp_reg); +         spe_release_register(f, tmp_masked_stencil); +      } +      break; + +   case PIPE_FUNC_GEQUAL: +      if (state->valuemask == stencil_max_value) { +         /* stencil_pass = fragment_mask & (reference >= s)  +          *              = fragment_mask & ~(s > reference) */ +         spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, +                                  state->ref_value); +         spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); +      } +      else { +         /* stencil_pass = fragment_mask & ~((s&mask) > (reference&mask)) */ +         int tmp_masked_stencil = spe_allocate_available_register(f); +         spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); +         spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, +                                  state->valuemask & state->ref_value); +         spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); +         spe_release_register(f, tmp_masked_stencil); +      } +      break; + +   case PIPE_FUNC_LEQUAL: +      if (state->valuemask == stencil_max_value) { +         /* stencil_pass = fragment_mask & (reference <= s) ] +          *               = fragment_mask & ~(reference > s) */ +         /* As above, we have to do this by loading a register */ +         int tmp_reg = spe_allocate_available_register(f); +         spe_load_uint(f, tmp_reg, state->ref_value); +         spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); +         spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); +         spe_release_register(f, tmp_reg); +      } +      else { +         /* stencil_pass = fragment_mask & ~((reference&mask) > (s&mask)) */ +         int tmp_reg = spe_allocate_available_register(f); +         int tmp_masked_stencil = spe_allocate_available_register(f); +         spe_load_uint(f, tmp_reg, state->ref_value & state->valuemask); +         spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); +         spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil); +         spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); +         spe_release_register(f, tmp_reg); +         spe_release_register(f, tmp_masked_stencil); +      } +      break; + +   case PIPE_FUNC_NEVER: +      /* stencil_pass = fragment_mask & 0 = 0 */ +      spe_load_uint(f, stencil_pass_reg, 0); +      break; + +   case PIPE_FUNC_ALWAYS: +      /* stencil_pass = fragment_mask & 1 = fragment_mask */ +      spe_move(f, stencil_pass_reg, fragment_mask_reg); +      break; +   } + +   /* The fragments that passed the stencil test are now in stencil_pass_reg. +    * The fragments that failed would be (fragment_mask_reg & ~stencil_pass_reg). +    */ +} + + +/** + * This function generates code that calculates a set of new stencil values + * given the earlier values and the operation to apply.  It does not + * apply any tests.  It is intended to be called up to 3 times + * (for the stencil fail operation, for the stencil pass-z fail operation, + * and for the stencil pass-z pass operation) to collect up to three + * possible sets of values, and for the caller to combine them based + * on the result of the tests. + * + * stencil_max_value should be (2^n - 1) where n is the number of bits + * in the stencil buffer - in other words, it should be usable as a mask. + */ +static void +gen_stencil_values(struct spe_function *f, +                   uint stencil_op, +                   uint stencil_ref_value, +                   uint stencil_max_value, +                   int fbS_reg, +                   int newS_reg) +{ +   /* The code below assumes that newS_reg and fbS_reg are not the same +    * register; if they can be, the calculations below will have to use +    * an additional temporary register.  For now, mark the assumption +    * with an assertion that will fail if they are the same. +    */ +   ASSERT(fbS_reg != newS_reg); + +   /* The code also assumes the the stencil_max_value is of the form  +    * 2^n-1 and can therefore be used as a mask for the valid bits in  +    * addition to a maximum.  Make sure this is the case as well. +    * The clever math below exploits the fact that incrementing a  +    * binary number serves to flip all the bits of a number starting at +    * the LSB and continuing to (and including) the first zero bit +    * found.  That means that a number and its increment will always +    * have at least one bit in common (the high order bit, if nothing +    * else) *unless* the number is zero, *or* the number is of a form +    * consisting of some number of 1s in the low-order bits followed +    * by nothing but 0s in the high-order bits.  The latter case +    * implies it's of the form 2^n-1. +    */ +   ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0); + +   switch(stencil_op) { +   case PIPE_STENCIL_OP_KEEP: +      /* newS = S */ +      spe_move(f, newS_reg, fbS_reg); +      break; + +   case PIPE_STENCIL_OP_ZERO: +      /* newS = 0 */ +      spe_zero(f, newS_reg); +      break; + +   case PIPE_STENCIL_OP_REPLACE: +      /* newS = stencil reference value */ +      spe_load_uint(f, newS_reg, stencil_ref_value); +      break; + +   case PIPE_STENCIL_OP_INCR: { +      /* newS = (s == max ? max : s + 1) */ +      int equals_reg = spe_allocate_available_register(f); + +      spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value); +      /* Add Word Immediate computes rT = rA + 10-bit signed immediate */ +      spe_ai(f, newS_reg, fbS_reg, 1); +      /* Select from the current value or the new value based on the equality test */ +      spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg); + +      spe_release_register(f, equals_reg); +      break; +   } +   case PIPE_STENCIL_OP_DECR: { +      /* newS = (s == 0 ? 0 : s - 1) */ +      int equals_reg = spe_allocate_available_register(f); + +      spe_compare_equal_uint(f, equals_reg, fbS_reg, 0); +      /* Add Word Immediate with a (-1) value works */ +      spe_ai(f, newS_reg, fbS_reg, -1); +      /* Select from the current value or the new value based on the equality test */ +      spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg); + +      spe_release_register(f, equals_reg); +      break; +   } +   case PIPE_STENCIL_OP_INCR_WRAP: +      /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can +       * do a normal add and mask off the correct bits  +       */ +      spe_ai(f, newS_reg, fbS_reg, 1); +      spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); +      break; + +   case PIPE_STENCIL_OP_DECR_WRAP: +      /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */ +      spe_ai(f, newS_reg, fbS_reg, -1); +      spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); +      break; + +   case PIPE_STENCIL_OP_INVERT: +      /* newS = ~s.  We take advantage of the mask/max value to invert only +       * the valid bits for the field so we don't have to do an extra "and". +       */ +      spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value); +      break; + +   default: +      ASSERT(0); +   } +} + + +/** + * This function generates code to get all the necessary possible + * stencil values.  For each of the output registers (fail_reg, + * zfail_reg, and zpass_reg), it either allocates a new register + * and calculates a new set of values based on the stencil operation, + * or it reuses a register allocation and calculation done for an + * earlier (matching) operation, or it reuses the fbS_reg register + * (if the stencil operation is KEEP, which doesn't change the  + * stencil buffer). + * + * Since this function allocates a variable number of registers, + * to avoid incurring complex logic to free them, they should + * be allocated after a spe_allocate_register_set() call + * and released by the corresponding spe_release_register_set() call. + */ +static void +gen_get_stencil_values(struct spe_function *f, +                       const struct pipe_stencil_state *stencil, +                       const uint depth_enabled, +                       int fbS_reg,  +                       int *fail_reg, +                       int *zfail_reg,  +                       int *zpass_reg) +{ +   uint zfail_op; + +   /* Stenciling had better be enabled here */ +   ASSERT(stencil->enabled); + +   /* If the depth test is not enabled, it is treated as though it always +    * passes, which means that the zfail_op is not considered - a +    * failing stencil test triggers the fail_op, and a passing one +    * triggers the zpass_op +    * +    * As an optimization, override calculation of the zfail_op values +    * if they aren't going to be used.  By setting the value of +    * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed +    * to match the incoming stencil values, and no calculation will +    * be done. +    */ +   if (depth_enabled) { +      zfail_op = stencil->zfail_op; +   } +   else { +      zfail_op = PIPE_STENCIL_OP_KEEP; +   } + +   /* One-sided or front-facing stencil */ +   if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) { +      *fail_reg = fbS_reg; +   } +   else { +      *fail_reg = spe_allocate_available_register(f); +      gen_stencil_values(f, stencil->fail_op, stencil->ref_value,  +         0xff, fbS_reg, *fail_reg); +   } + +   /* Check the possibly overridden value, not the structure value */ +   if (zfail_op == PIPE_STENCIL_OP_KEEP) { +      *zfail_reg = fbS_reg; +   } +   else if (zfail_op == stencil->fail_op) { +      *zfail_reg = *fail_reg; +   } +   else { +      *zfail_reg = spe_allocate_available_register(f); +      gen_stencil_values(f, stencil->zfail_op, stencil->ref_value,  +         0xff, fbS_reg, *zfail_reg); +   } + +   if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) { +      *zpass_reg = fbS_reg; +   } +   else if (stencil->zpass_op == stencil->fail_op) { +      *zpass_reg = *fail_reg; +   } +   else if (stencil->zpass_op == zfail_op) { +      *zpass_reg = *zfail_reg; +   } +   else { +      *zpass_reg = spe_allocate_available_register(f); +      gen_stencil_values(f, stencil->zpass_op, stencil->ref_value,  +         0xff, fbS_reg, *zpass_reg); +   } +} + +/** + * Note that fbZ_reg may *not* be set on entry, if in fact + * the depth test is not enabled.  This function must not use + * the register if depth is not enabled. + */ +static boolean +gen_stencil_depth_test(struct spe_function *f,  +                       const struct pipe_depth_stencil_alpha_state *dsa,  +                       const uint facing, +                       const int mask_reg, const int fragZ_reg,  +                       const int fbZ_reg, const int fbS_reg) +{ +   /* True if we've generated code that could require writeback to the +    * depth and/or stencil buffers +    */ +   boolean modified_buffers = FALSE; + +   boolean need_to_calculate_stencil_values; +   boolean need_to_writemask_stencil_values; + +   struct pipe_stencil_state *stencil; + +   /* Registers.  We may or may not actually allocate these, depending +    * on whether the state values indicate that we need them. +    */ +   int stencil_pass_reg, stencil_fail_reg; +   int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values; +   int stencil_writemask_reg; +   int zmask_reg; +   int newS_reg; + +   /* Stenciling is quite complex: up to six different configurable stencil  +    * operations/calculations can be required (three each for front-facing +    * and back-facing fragments).  Many of those operations will likely  +    * be identical, so there's good reason to try to avoid calculating  +    * the same values more than once (which unfortunately makes the code less  +    * straightforward). +    * +    * To make register management easier, we start a new  +    * register set; we can release all the registers in the set at +    * once, and avoid having to keep track of exactly which registers +    * we allocate.  We can still allocate and free registers as  +    * desired (if we know we no longer need a register), but we don't +    * have to spend the complexity to track the more difficult variant +    * register usage scenarios. +    */ +   spe_comment(f, 0, "Allocating stencil register set"); +   spe_allocate_register_set(f); + +   /* The facing we're given is the fragment facing; it doesn't +    * exactly match the stencil facing.  If stencil is enabled, +    * but two-sided stencil is *not* enabled, we use the same +    * stencil settings for both front- and back-facing fragments. +    * We only use the "back-facing" stencil for backfacing fragments +    * if two-sided stenciling is enabled. +    */ +   if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) { +      stencil = &dsa->stencil[1]; +   } +   else { +      stencil = &dsa->stencil[0]; +   } + +   /* Calculate the writemask.  If the writemask is trivial (either +    * all 0s, meaning that we don't need to calculate any stencil values +    * because they're not going to change the stencil anyway, or all 1s, +    * meaning that we have to calculate the stencil values but do not +    * need to mask them), we can avoid generating code.  Don't forget +    * that we need to consider backfacing stencil, if enabled. +    * +    * Note that if the backface stencil is *not* enabled, the backface +    * stencil will have the same values as the frontface stencil. +    */ +   if (stencil->fail_op == PIPE_STENCIL_OP_KEEP && +       stencil->zfail_op == PIPE_STENCIL_OP_KEEP && +       stencil->zpass_op == PIPE_STENCIL_OP_KEEP) { +       need_to_calculate_stencil_values = FALSE; +       need_to_writemask_stencil_values = FALSE; +    } +    else if (stencil->writemask == 0x0) { +      /* All changes are writemasked out, so no need to calculate +       * what those changes might be, and no need to write anything back. +       */ +      need_to_calculate_stencil_values = FALSE; +      need_to_writemask_stencil_values = FALSE; +   } +   else if (stencil->writemask == 0xff) { +      /* Still trivial, but a little less so.  We need to write the stencil +       * values, but we don't need to mask them. +       */ +      need_to_calculate_stencil_values = TRUE; +      need_to_writemask_stencil_values = FALSE; +   } +   else { +      /* The general case: calculate, mask, and write */ +      need_to_calculate_stencil_values = TRUE; +      need_to_writemask_stencil_values = TRUE; + +      /* While we're here, generate code that calculates what the +       * writemask should be.  If backface stenciling is enabled, +       * and the backface writemask is not the same as the frontface +       * writemask, we'll have to generate code that merges the +       * two masks into a single effective mask based on fragment facing. +       */ +      spe_comment(f, 0, "Computing stencil writemask"); +      stencil_writemask_reg = spe_allocate_available_register(f); +      spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].writemask); +   } + +   /* At least one-sided stenciling must be on.  Generate code that +    * runs the stencil test on the basic/front-facing stencil, leaving +    * the mask of passing stencil bits in stencil_pass_reg.  This mask will +    * be used both to mask the set of active pixels, and also to +    * determine how the stencil buffer changes. +    * +    * This test will *not* change the value in mask_reg (because we don't +    * yet know whether to apply the two-sided stencil or one-sided stencil). +    */ +   spe_comment(f, 0, "Running basic stencil test"); +   stencil_pass_reg = spe_allocate_available_register(f); +   gen_stencil_test(f, stencil, 0xff, mask_reg, fbS_reg, stencil_pass_reg); + +   /* Generate code that, given the mask of valid fragments and the +    * mask of valid fragments that passed the stencil test, computes +    * the mask of valid fragments that failed the stencil test.  We +    * have to do this before we run a depth test (because the +    * depth test should not be performed on fragments that failed the +    * stencil test, and because the depth test will update the  +    * mask of valid fragments based on the results of the depth test). +    */ +   spe_comment(f, 0, "Computing stencil fail mask and updating fragment mask"); +   stencil_fail_reg = spe_allocate_available_register(f); +   spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg); +   /* Now remove the stenciled-out pixels from the valid fragment mask, +    * so we can later use the valid fragment mask in the depth test. +    */ +   spe_and(f, mask_reg, mask_reg, stencil_pass_reg); + +   /* We may not need to calculate stencil values, if the writemask is off */ +   if (need_to_calculate_stencil_values) { +      /* Generate code that calculates exactly which stencil values we need, +       * without calculating the same value twice (say, if two different +       * stencil ops have the same value).  This code will work for one-sided +       * and two-sided stenciling (so that we take into account that operations +       * may match between front and back stencils), and will also take into +       * account whether the depth test is enabled (if the depth test is off, +       * we don't need any of the zfail results, because the depth test always +       * is considered to pass if it is disabled).  Any register value that +       * does not need to be calculated will come back with the same value +       * that's in fbS_reg. +       * +       * This function will allocate a variant number of registers that +       * will be released as part of the register set. +       */ +      spe_comment(f, 0, facing == CELL_FACING_FRONT +                  ? "Computing front-facing stencil values" +                  : "Computing back-facing stencil values"); +      gen_get_stencil_values(f, stencil, dsa->depth.enabled, fbS_reg,  +         &stencil_fail_values, &stencil_pass_depth_fail_values,  +         &stencil_pass_depth_pass_values); +   }   + +   /* We now have all the stencil values we need.  We also need  +    * the results of the depth test to figure out which +    * stencil values will become the new stencil values.  (Even if +    * we aren't actually calculating stencil values, we need to apply +    * the depth test if it's enabled.) +    * +    * The code generated by gen_depth_test() returns the results of the +    * test in the given register, but also alters the mask_reg based +    * on the results of the test. +    */ +   if (dsa->depth.enabled) { +      spe_comment(f, 0, "Running stencil depth test"); +      zmask_reg = spe_allocate_available_register(f); +      modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg, +                                         fbZ_reg, zmask_reg); +   } + +   if (need_to_calculate_stencil_values) { + +      /* If we need to writemask the stencil values before going into +       * the stencil buffer, we'll have to use a new register to +       * hold the new values.  If not, we can just keep using the +       * current register. +       */ +      if (need_to_writemask_stencil_values) { +         newS_reg = spe_allocate_available_register(f); +         spe_comment(f, 0, "Saving current stencil values for writemasking"); +         spe_move(f, newS_reg, fbS_reg); +      } +      else { +         newS_reg = fbS_reg; +      } + +      /* Merge in the selected stencil fail values */ +      if (stencil_fail_values != fbS_reg) { +         spe_comment(f, 0, "Loading stencil fail values"); +         spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg); +         modified_buffers = TRUE; +      } + +      /* Same for the stencil pass/depth fail values.  If this calculation +       * is not needed (say, if depth test is off), then the +       * stencil_pass_depth_fail_values register will be equal to fbS_reg +       * and we'll skip the calculation. +       */ +      if (stencil_pass_depth_fail_values != fbS_reg) { +         /* We don't actually have a stencil pass/depth fail mask yet. +          * Calculate it here from the stencil passing mask and the +          * depth passing mask.  Note that zmask_reg *must* have been +          * set above if we're here. +          */ +         uint stencil_pass_depth_fail_mask = +            spe_allocate_available_register(f); + +         spe_comment(f, 0, "Loading stencil pass/depth fail values"); +         spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg); + +         spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values, +                  stencil_pass_depth_fail_mask); + +         spe_release_register(f, stencil_pass_depth_fail_mask); +         modified_buffers = TRUE; +      } + +      /* Same for the stencil pass/depth pass mask.  Note that we +       * *can* get here with zmask_reg being unset (if the depth +       * test is off but the stencil test is on).  In this case, +       * we assume the depth test passes, and don't need to mask +       * the stencil pass mask with the Z mask. +       */ +      if (stencil_pass_depth_pass_values != fbS_reg) { +         if (dsa->depth.enabled) { +            uint stencil_pass_depth_pass_mask = spe_allocate_available_register(f); +            /* We'll need a separate register */ +            spe_comment(f, 0, "Loading stencil pass/depth pass values"); +            spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg); +            spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask); +            spe_release_register(f, stencil_pass_depth_pass_mask); +         } +         else { +            /* We can use the same stencil-pass register */ +            spe_comment(f, 0, "Loading stencil pass values"); +            spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_reg); +         } +         modified_buffers = TRUE; +      } + +      /* Almost done.  If we need to writemask, do it now, leaving the +       * results in the fbS_reg register passed in.  If we don't need +       * to writemask, then the results are *already* in the fbS_reg, +       * so there's nothing more to do. +       */ + +      if (need_to_writemask_stencil_values && modified_buffers) { +         /* The Select Bytes command makes a fine writemask.  Where +          * the mask is 0, the first (original) values are retained, +          * effectively masking out changes.  Where the mask is 1, the +          * second (new) values are retained, incorporating changes. +          */ +         spe_comment(f, 0, "Writemasking new stencil values"); +         spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg); +      } + +   } /* done calculating stencil values */ + +   /* The stencil and/or depth values have been applied, and the +    * mask_reg, fbS_reg, and fbZ_reg values have been updated. +    * We're all done, except that we've allocated a fair number +    * of registers that we didn't bother tracking.  Release all +    * those registers as part of the register set, and go home. +    */ +   spe_comment(f, 0, "Releasing stencil register set"); +   spe_release_register_set(f); + +   /* Return TRUE if we could have modified the stencil and/or +    * depth buffers. +    */ +   return modified_buffers; +} + + +/** + * Generate depth and/or stencil test code. + * \param cell  context + * \param dsa  depth/stencil/alpha state + * \param f  spe function to emit + * \param facing  either CELL_FACING_FRONT or CELL_FACING_BACK + * \param mask_reg  register containing the pixel alive/dead mask + * \param depth_tile_reg  register containing address of z/stencil tile + * \param quad_offset_reg  offset to quad from start of tile + * \param fragZ_reg  register containg fragment Z values + */ +static void +gen_depth_stencil(struct cell_context *cell, +                  const struct pipe_depth_stencil_alpha_state *dsa, +                  struct spe_function *f, +                  uint facing, +                  int mask_reg, +                  int depth_tile_reg, +                  int quad_offset_reg, +                  int fragZ_reg) + +{ +   const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; +   boolean write_depth_stencil; + +   /* framebuffer's combined z/stencil values register */ +   int fbZS_reg = spe_allocate_available_register(f); + +   /* Framebufer Z values register */ +   int fbZ_reg = spe_allocate_available_register(f); + +   /* Framebuffer stencil values register (may not be used) */ +   int fbS_reg = spe_allocate_available_register(f); + +   /* 24-bit mask register (may not be used) */ +   int zmask_reg = spe_allocate_available_register(f); + +   /** +    * The following code: +    * 1. fetch quad of packed Z/S values from the framebuffer tile. +    * 2. extract the separate the Z and S values from packed values +    * 3. convert fragment Z values from float in [0,1] to 32/24/16-bit ints +    * +    * The instructions for doing this are interleaved for better performance. +    */ +   spe_comment(f, 0, "Fetch Z/stencil quad from tile"); + +   switch(zs_format) { +   case PIPE_FORMAT_S8Z24_UNORM: /* fall through */ +   case PIPE_FORMAT_X8Z24_UNORM: +      /* prepare mask to extract Z vals from ZS vals */ +      spe_load_uint(f, zmask_reg, 0x00ffffff); + +      /* convert fragment Z from [0,1] to 32-bit ints */ +      spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + +      /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ +      spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + +      /* right shift 32-bit fragment Z to 24 bits */ +      spe_rotmi(f, fragZ_reg, fragZ_reg, -8); + +      /* extract 24-bit Z values from ZS values by masking */ +      spe_and(f, fbZ_reg, fbZS_reg, zmask_reg); + +      /* extract 8-bit stencil values by shifting */ +      spe_rotmi(f, fbS_reg, fbZS_reg, -24); +      break; + +   case PIPE_FORMAT_Z24S8_UNORM: /* fall through */ +   case PIPE_FORMAT_Z24X8_UNORM: +      /* convert fragment Z from [0,1] to 32-bit ints */ +      spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + +      /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ +      spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + +      /* right shift 32-bit fragment Z to 24 bits */ +      spe_rotmi(f, fragZ_reg, fragZ_reg, -8); + +      /* extract 24-bit Z values from ZS values by shifting */ +      spe_rotmi(f, fbZ_reg, fbZS_reg, -8); + +      /* extract 8-bit stencil values by masking */ +      spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff); +      break; + +   case PIPE_FORMAT_Z32_UNORM: +      /* Load: fbZ_reg = memory[depth_tile_reg + offset_reg] */ +      spe_lqx(f, fbZ_reg, depth_tile_reg, quad_offset_reg); + +      /* convert fragment Z from [0,1] to 32-bit ints */ +      spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + +      /* No stencil, so can't do anything there */ +      break; + +   case PIPE_FORMAT_Z16_UNORM: +      /* XXX This code for 16bpp Z is broken! */ + +      /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ +      spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + +      /* Copy over 4 32-bit values */ +      spe_move(f, fbZ_reg, fbZS_reg); + +      /* convert Z from [0,1] to 16-bit ints */ +      spe_cfltu(f, fragZ_reg, fragZ_reg, 32); +      spe_rotmi(f, fragZ_reg, fragZ_reg, -16); +      /* No stencil */ +      break; + +   default: +      ASSERT(0); /* invalid format */ +   } + +   /* If stencil is enabled, use the stencil-specific code +    * generator to generate both the stencil and depth (if needed) +    * tests.  Otherwise, if only depth is enabled, generate +    * a quick depth test.  The test generators themselves will +    * report back whether the depth/stencil buffer has to be +    * written back. +    */ +   if (dsa->stencil[0].enabled) { +      /* This will perform the stencil and depth tests, and update +       * the mask_reg, fbZ_reg, and fbS_reg as required by the +       * tests. +       */ +      ASSERT(fbS_reg >= 0); +      spe_comment(f, 0, "Perform stencil test"); + +      /* Note that fbZ_reg may not be set on entry, if stenciling +       * is enabled but there's no Z-buffer.  The  +       * gen_stencil_depth_test() function must ignore the +       * fbZ_reg register if depth is not enabled. +       */ +      write_depth_stencil = gen_stencil_depth_test(f, dsa, facing, +                                                   mask_reg, fragZ_reg, +                                                   fbZ_reg, fbS_reg); +   } +   else if (dsa->depth.enabled) { +      int zmask_reg = spe_allocate_available_register(f); +      ASSERT(fbZ_reg >= 0); +      spe_comment(f, 0, "Perform depth test"); +      write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg, +                                           fbZ_reg, zmask_reg); +      spe_release_register(f, zmask_reg); +   } +   else { +      write_depth_stencil = FALSE; +   } + +   if (write_depth_stencil) { +      /* Merge latest Z and Stencil values into fbZS_reg. +       * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. +       * fbS_reg has four 8-bit Z values in bits [7..0]. +       */ +      spe_comment(f, 0, "Store quad's depth/stencil values in tile"); +      if (zs_format == PIPE_FORMAT_S8Z24_UNORM || +          zs_format == PIPE_FORMAT_X8Z24_UNORM) { +         spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ +         spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ +      } +      else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || +               zs_format == PIPE_FORMAT_Z24X8_UNORM) { +         spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ +         spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ +      } +      else if (zs_format == PIPE_FORMAT_Z32_UNORM) { +         spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ +      } +      else if (zs_format == PIPE_FORMAT_Z16_UNORM) { +         spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ +      } +      else if (zs_format == PIPE_FORMAT_S8_UNORM) { +         ASSERT(0);   /* XXX to do */ +      } +      else { +         ASSERT(0); /* bad zs_format */ +      } + +      /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */ +      spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); +   } + +   /* Don't need these any more */ +   spe_release_register(f, fbZS_reg); +   spe_release_register(f, fbZ_reg); +   spe_release_register(f, fbS_reg); +   spe_release_register(f, zmask_reg); +} + + + +/** + * Generate SPE code to implement the fragment operations (alpha test, + * depth test, stencil test, blending, colormask, and final + * framebuffer write) as specified by the current context state. + * + * Logically, this code will be called after running the fragment + * shader.  But under some circumstances we could run some of this + * code before the fragment shader to cull fragments/quads that are + * totally occluded/discarded. + * + * XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now. + * + * See the spu_default_fragment_ops() function to see how the per-fragment + * operations would be done with ordinary C code. + * The code we generate here though has no branches, is SIMD, etc and + * should be much faster. + * + * \param cell  the rendering context (in) + * \param facing whether the generated code is for front-facing or  + *              back-facing fragments + * \param f     the generated function (in/out); on input, the function + *              must already have been initialized.  On exit, whatever + *              instructions within the generated function have had + *              the fragment ops appended. + */ +void +cell_gen_fragment_function(struct cell_context *cell, +                           const uint facing, +                           struct spe_function *f) +{ +   const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil; +   const struct pipe_blend_state *blend = cell->blend; +   const struct pipe_blend_color *blend_color = &cell->blend_color; +   const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; + +   /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ +   const int x_reg = 3;  /* uint */ +   const int y_reg = 4;  /* uint */ +   const int color_tile_reg = 5;  /* tile_t * */ +   const int depth_tile_reg = 6;  /* tile_t * */ +   const int fragZ_reg = 7;   /* vector float */ +   const int fragR_reg = 8;   /* vector float */ +   const int fragG_reg = 9;   /* vector float */ +   const int fragB_reg = 10;  /* vector float */ +   const int fragA_reg = 11;  /* vector float */ +   const int mask_reg = 12;   /* vector uint */ + +   ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK); + +   /* offset of quad from start of tile +    * XXX assuming 4-byte pixels for color AND Z/stencil!!!! +    */ +   int quad_offset_reg; + +   int fbRGBA_reg;  /**< framebuffer's RGBA colors for quad */ + +   if (cell->debug_flags & CELL_DEBUG_ASM) { +      spe_print_code(f, TRUE); +      spe_indent(f, 8); +      spe_comment(f, -4, facing == CELL_FACING_FRONT +                  ? "Begin front-facing per-fragment ops" +                  : "Begin back-facing per-fragment ops"); +   } + +   spe_allocate_register(f, x_reg); +   spe_allocate_register(f, y_reg); +   spe_allocate_register(f, color_tile_reg); +   spe_allocate_register(f, depth_tile_reg); +   spe_allocate_register(f, fragZ_reg); +   spe_allocate_register(f, fragR_reg); +   spe_allocate_register(f, fragG_reg); +   spe_allocate_register(f, fragB_reg); +   spe_allocate_register(f, fragA_reg); +   spe_allocate_register(f, mask_reg); + +   quad_offset_reg = spe_allocate_available_register(f); +   fbRGBA_reg = spe_allocate_available_register(f); + +   /* compute offset of quad from start of tile, in bytes */ +   { +      int x2_reg = spe_allocate_available_register(f); +      int y2_reg = spe_allocate_available_register(f); + +      ASSERT(TILE_SIZE == 32); + +      spe_comment(f, 0, "Compute quad offset within tile"); +      spe_rotmi(f, y2_reg, y_reg, -1);  /* y2 = y / 2 */ +      spe_rotmi(f, x2_reg, x_reg, -1);  /* x2 = x / 2 */ +      spe_shli(f, y2_reg, y2_reg, 4);   /* y2 *= 16 */ +      spe_a(f, quad_offset_reg, y2_reg, x2_reg);  /* offset = y2 + x2 */ +      spe_shli(f, quad_offset_reg, quad_offset_reg, 4);   /* offset *= 16 */ + +      spe_release_register(f, x2_reg); +      spe_release_register(f, y2_reg); +   } + +   /* Generate the alpha test, if needed. */ +   if (dsa->alpha.enabled) { +      gen_alpha_test(dsa, f, mask_reg, fragA_reg); +   } + +   /* generate depth and/or stencil test code */ +   if (dsa->depth.enabled || dsa->stencil[0].enabled) { +      gen_depth_stencil(cell, dsa, f, +                        facing, +                        mask_reg, +                        depth_tile_reg, +                        quad_offset_reg, +                        fragZ_reg); +   } + +   /* Get framebuffer quad/colors.  We'll need these for blending, +    * color masking, and to obey the quad/pixel mask. +    * Load: fbRGBA_reg = memory[color_tile + quad_offset] +    * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking +    * we could skip this load. +    */ +   spe_comment(f, 0, "Fetch quad colors from tile"); +   spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); + +   if (blend->blend_enable) { +      spe_comment(f, 0, "Perform blending"); +      gen_blend(blend, blend_color, f, color_format, +                fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); +   } + +   /* +    * Write fragment colors to framebuffer/tile. +    * This involves converting the fragment colors from float[4] to the +    * tile's specific format and obeying the quad/pixel mask. +    */ +   { +      int rgba_reg = spe_allocate_available_register(f); + +      /* Pack four float colors as four 32-bit int colors */ +      spe_comment(f, 0, "Convert float quad colors to packed int framebuffer colors"); +      gen_pack_colors(f, color_format, +                      fragR_reg, fragG_reg, fragB_reg, fragA_reg, +                      rgba_reg); + +      if (blend->logicop_enable) { +         spe_comment(f, 0, "Compute logic op"); +         gen_logicop(blend, f, rgba_reg, fbRGBA_reg); +      } + +      if (blend->colormask != PIPE_MASK_RGBA) { +         spe_comment(f, 0, "Compute color mask"); +         gen_colormask(f, blend->colormask, color_format, rgba_reg, fbRGBA_reg); +      } + +      /* Mix fragment colors with framebuffer colors using the quad/pixel mask: +       * if (mask[i]) +       *    rgba[i] = rgba[i]; +       * else +       *    rgba[i] = framebuffer[i]; +       */ +      spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg); + +      /* Store updated quad in tile: +       * memory[color_tile + quad_offset] = rgba_reg; +       */ +      spe_comment(f, 0, "Store quad colors into color tile"); +      spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); + +      spe_release_register(f, rgba_reg); +   } + +   //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); + +   spe_bi(f, SPE_REG_RA, 0, 0);  /* return from function call */ + +   spe_release_register(f, fbRGBA_reg); +   spe_release_register(f, quad_offset_reg); + +   if (cell->debug_flags & CELL_DEBUG_ASM) { +      char buffer[1024]; +      sprintf(buffer, "End %s-facing per-fragment ops: %d instructions",  +         facing == CELL_FACING_FRONT ? "front" : "back", f->num_inst); +      spe_comment(f, -4, buffer); +   } +} diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h new file mode 100644 index 0000000000..21b35d1faf --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h @@ -0,0 +1,38 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef CELL_GEN_FRAGMENT_H +#define CELL_GEN_FRAGMENT_H + + +extern void +cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f); + + +#endif /* CELL_GEN_FRAGMENT_H */ + diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c new file mode 100644 index 0000000000..ca358ed031 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -0,0 +1,358 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + *  Keith Whitwell <keith@tungstengraphics.com> + *  Brian Paul + */ + +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "draw/draw_context.h" +#include "cell_context.h" +#include "cell_flush.h" +#include "cell_pipe_state.h" +#include "cell_state.h" +#include "cell_texture.h" + + + +static void * +cell_create_blend_state(struct pipe_context *pipe, +                        const struct pipe_blend_state *blend) +{ +   return mem_dup(blend, sizeof(*blend)); +} + + +static void +cell_bind_blend_state(struct pipe_context *pipe, void *blend) +{ +   struct cell_context *cell = cell_context(pipe); + +   draw_flush(cell->draw); + +   cell->blend = (struct pipe_blend_state *) blend; +   cell->dirty |= CELL_NEW_BLEND; +} + + +static void +cell_delete_blend_state(struct pipe_context *pipe, void *blend) +{ +   FREE(blend); +} + + +static void +cell_set_blend_color(struct pipe_context *pipe, +                     const struct pipe_blend_color *blend_color) +{ +   struct cell_context *cell = cell_context(pipe); + +   draw_flush(cell->draw); + +   cell->blend_color = *blend_color; + +   cell->dirty |= CELL_NEW_BLEND; +} + + + + +static void * +cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, +                 const struct pipe_depth_stencil_alpha_state *dsa) +{ +   return mem_dup(dsa, sizeof(*dsa)); +} + + +static void +cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, +                                    void *dsa) +{ +   struct cell_context *cell = cell_context(pipe); + +   draw_flush(cell->draw); + +   cell->depth_stencil = (struct pipe_depth_stencil_alpha_state *) dsa; +   cell->dirty |= CELL_NEW_DEPTH_STENCIL; +} + + +static void +cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa) +{ +   FREE(dsa); +} + + +static void +cell_set_clip_state(struct pipe_context *pipe, +                    const struct pipe_clip_state *clip) +{ +   struct cell_context *cell = cell_context(pipe); + +   /* pass the clip state to the draw module */ +   draw_set_clip_state(cell->draw, clip); +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +static void +cell_set_viewport_state( struct pipe_context *pipe, +                         const struct pipe_viewport_state *viewport ) +{ +   struct cell_context *cell = cell_context(pipe); + +   cell->viewport = *viewport; /* struct copy */ +   cell->dirty |= CELL_NEW_VIEWPORT; + +   /* pass the viewport info to the draw module */ +   draw_set_viewport_state(cell->draw, viewport); + +   /* Using tnl/ and vf/ modules is temporary while getting started. +    * Full pipe will have vertex shader, vertex fetch of its own. +    */ +} + + +static void +cell_set_scissor_state( struct pipe_context *pipe, +                        const struct pipe_scissor_state *scissor ) +{ +   struct cell_context *cell = cell_context(pipe); + +   memcpy( &cell->scissor, scissor, sizeof(*scissor) ); +   cell->dirty |= CELL_NEW_SCISSOR; +} + + +static void +cell_set_polygon_stipple( struct pipe_context *pipe, +                          const struct pipe_poly_stipple *stipple ) +{ +   struct cell_context *cell = cell_context(pipe); + +   memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) ); +   cell->dirty |= CELL_NEW_STIPPLE; +} + + + +static void * +cell_create_rasterizer_state(struct pipe_context *pipe, +                             const struct pipe_rasterizer_state *rasterizer) +{ +   return mem_dup(rasterizer, sizeof(*rasterizer)); +} + + +static void +cell_bind_rasterizer_state(struct pipe_context *pipe, void *rast) +{ +   struct pipe_rasterizer_state *rasterizer = +      (struct pipe_rasterizer_state *) rast; +   struct cell_context *cell = cell_context(pipe); + +   /* pass-through to draw module */ +   draw_set_rasterizer_state(cell->draw, rasterizer); + +   cell->rasterizer = rasterizer; + +   cell->dirty |= CELL_NEW_RASTERIZER; +} + + +static void +cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) +{ +   FREE(rasterizer); +} + + + +static void * +cell_create_sampler_state(struct pipe_context *pipe, +                          const struct pipe_sampler_state *sampler) +{ +   return mem_dup(sampler, sizeof(*sampler)); +} + + +static void +cell_bind_sampler_states(struct pipe_context *pipe, +                         unsigned num, void **samplers) +{ +   struct cell_context *cell = cell_context(pipe); +   uint i, changed = 0x0; + +   assert(num <= CELL_MAX_SAMPLERS); + +   draw_flush(cell->draw); + +   for (i = 0; i < CELL_MAX_SAMPLERS; i++) { +      struct pipe_sampler_state *new_samp = i < num ? samplers[i] : NULL; +      if (cell->sampler[i] != new_samp) { +         cell->sampler[i] = new_samp; +         changed |= (1 << i); +      } +   } + +   if (changed) { +      cell->dirty |= CELL_NEW_SAMPLER; +      cell->dirty_samplers |= changed; +   } +} + + +static void +cell_delete_sampler_state(struct pipe_context *pipe, +                              void *sampler) +{ +   FREE( sampler ); +} + + + +static void +cell_set_sampler_textures(struct pipe_context *pipe, +                          unsigned num, struct pipe_texture **texture) +{ +   struct cell_context *cell = cell_context(pipe); +   uint i, changed = 0x0; + +   assert(num <= CELL_MAX_SAMPLERS); + +   for (i = 0; i < CELL_MAX_SAMPLERS; i++) { +      struct pipe_texture *new_tex = i < num ? texture[i] : NULL; +      if ((struct pipe_texture *) cell->texture[i] != new_tex) { +         pipe_texture_reference((struct pipe_texture **) &cell->texture[i], +                                new_tex); +         changed |= (1 << i); +      } +   } + +   cell->num_textures = num; + +   if (changed) { +      cell->dirty |= CELL_NEW_TEXTURE; +      cell->dirty_textures |= changed; +   } +} + + + +static void +cell_set_framebuffer_state(struct pipe_context *pipe, +                           const struct pipe_framebuffer_state *fb) +{ +   struct cell_context *cell = cell_context(pipe); + +   if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) { +      struct pipe_surface *csurf = fb->cbufs[0]; +      struct pipe_surface *zsurf = fb->zsbuf; +      uint i; +      uint flags = (PIPE_BUFFER_USAGE_GPU_WRITE | +                    PIPE_BUFFER_USAGE_GPU_READ); + +      /* unmap old surfaces */ +      for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { +         if (cell->framebuffer.cbufs[i] && cell->cbuf_map[i]) { +            pipe_surface_unmap(cell->framebuffer.cbufs[i]); +            cell->cbuf_map[i] = NULL; +         } +      } + +      if (cell->framebuffer.zsbuf && cell->zsbuf_map) { +         pipe_surface_unmap(cell->framebuffer.zsbuf); +         cell->zsbuf_map = NULL; +      } + +      /* Finish any pending rendering to the current surface before +       * installing a new surface! +       */ +      cell_flush_int(cell, CELL_FLUSH_WAIT); + +      /* update my state +       * (this is also where old surfaces will finally get freed) +       */ +      cell->framebuffer.width = fb->width; +      cell->framebuffer.height = fb->height; +      cell->framebuffer.nr_cbufs = fb->nr_cbufs; +      for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { +         pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]); +      } +      pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf); + +      /* map new surfaces */ +      if (csurf) +         cell->cbuf_map[0] = pipe_surface_map(csurf, flags); + +      if (zsurf) +         cell->zsbuf_map = pipe_surface_map(zsurf, flags); + +      cell->dirty |= CELL_NEW_FRAMEBUFFER; +   } +} + + + +void +cell_init_state_functions(struct cell_context *cell) +{ +   cell->pipe.create_blend_state = cell_create_blend_state; +   cell->pipe.bind_blend_state   = cell_bind_blend_state; +   cell->pipe.delete_blend_state = cell_delete_blend_state; + +   cell->pipe.create_sampler_state = cell_create_sampler_state; +   cell->pipe.bind_sampler_states = cell_bind_sampler_states; +   cell->pipe.delete_sampler_state = cell_delete_sampler_state; + +   cell->pipe.set_sampler_textures = cell_set_sampler_textures; + +   cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; +   cell->pipe.bind_depth_stencil_alpha_state   = cell_bind_depth_stencil_alpha_state; +   cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state; + +   cell->pipe.create_rasterizer_state = cell_create_rasterizer_state; +   cell->pipe.bind_rasterizer_state   = cell_bind_rasterizer_state; +   cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state; + +   cell->pipe.set_blend_color = cell_set_blend_color; +   cell->pipe.set_clip_state = cell_set_clip_state; + +   cell->pipe.set_framebuffer_state = cell_set_framebuffer_state; + +   cell->pipe.set_polygon_stipple = cell_set_polygon_stipple; +   cell->pipe.set_scissor_state = cell_set_scissor_state; +   cell->pipe.set_viewport_state = cell_set_viewport_state; +} diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.h b/src/gallium/drivers/cell/ppu/cell_pipe_state.h new file mode 100644 index 0000000000..1889bd52ff --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.h @@ -0,0 +1,39 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef CELL_PIPE_STATE_H +#define CELL_PIPE_STATE_H + + +struct cell_context; + +extern void +cell_init_state_functions(struct cell_context *cell); + + +#endif /* CELL_PIPE_STATE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c new file mode 100644 index 0000000000..79cb8df82f --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -0,0 +1,211 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * \brief  Last stage of 'draw' pipeline: send tris to SPUs. + * \author  Brian Paul + */ + +#include "cell_context.h" +#include "cell_render.h" +#include "cell_spu.h" +#include "util/u_memory.h" +#include "draw/draw_private.h" + + +struct render_stage { +   struct draw_stage stage; /**< This must be first (base class) */ + +   struct cell_context *cell; +}; + + +static INLINE struct render_stage * +render_stage(struct draw_stage *stage) +{ +   return (struct render_stage *) stage; +} + + +static void render_begin( struct draw_stage *stage ) +{ +#if 0 +   struct render_stage *render = render_stage(stage); +   struct cell_context *sp = render->cell; +   const struct pipe_shader_state *fs = &render->cell->fs->shader; +   render->quad.nr_attrs = render->cell->nr_frag_attrs; + +   render->firstFpInput = fs->input_semantic_name[0]; + +   sp->quad.first->begin(sp->quad.first); +#endif +} + + +static void render_end( struct draw_stage *stage ) +{ +} + + +static void reset_stipple_counter( struct draw_stage *stage ) +{ +   struct render_stage *render = render_stage(stage); +   /*render->cell->line_stipple_counter = 0;*/ +} + + +static void +render_point(struct draw_stage *stage, struct prim_header *prim) +{ +} + + +static void +render_line(struct draw_stage *stage, struct prim_header *prim) +{ +} + + +/** Write a vertex into the prim buffer */ +static void +save_vertex(struct cell_prim_buffer *buf, uint pos, +            const struct vertex_header *vert) +{ +   uint attr, j; + +   for (attr = 0; attr < 2; attr++) { +      for (j = 0; j < 4; j++) { +         buf->vertex[pos][attr][j] = vert->data[attr][j]; +      } +   } + +   /* update bounding box */ +   if (vert->data[0][0] < buf->xmin) +      buf->xmin = vert->data[0][0]; +   if (vert->data[0][0] > buf->xmax) +      buf->xmax = vert->data[0][0]; +   if (vert->data[0][1] < buf->ymin) +      buf->ymin = vert->data[0][1]; +   if (vert->data[0][1] > buf->ymax) +      buf->ymax = vert->data[0][1]; +} + + +static void +render_tri(struct draw_stage *stage, struct prim_header *prim) +{ +   struct render_stage *rs = render_stage(stage); +   struct cell_context *cell = rs->cell; +   struct cell_prim_buffer *buf = &cell->prim_buffer; +   uint i; + +   if (buf->num_verts + 3 > CELL_MAX_VERTS) { +      cell_flush_prim_buffer(cell); +   } + +   i = buf->num_verts; +   assert(i+2 <= CELL_MAX_VERTS); +   save_vertex(buf, i+0, prim->v[0]); +   save_vertex(buf, i+1, prim->v[1]); +   save_vertex(buf, i+2, prim->v[2]); +   buf->num_verts += 3; +} + + +/** + * Send the a RENDER command to all SPUs to have them render the prims + * in the current prim_buffer. + */ +void +cell_flush_prim_buffer(struct cell_context *cell) +{ +   uint i; + +   if (cell->prim_buffer.num_verts == 0) +      return; + +   for (i = 0; i < cell->num_spus; i++) { +      struct cell_command_render *render = &cell_global.command[i].render; +      render->prim_type = PIPE_PRIM_TRIANGLES; +      render->num_verts = cell->prim_buffer.num_verts; +      render->front_winding = cell->rasterizer->front_winding; +      render->vertex_size = cell->vertex_info->size * 4; +      render->xmin = cell->prim_buffer.xmin; +      render->ymin = cell->prim_buffer.ymin; +      render->xmax = cell->prim_buffer.xmax; +      render->ymax = cell->prim_buffer.ymax; +      render->vertex_data = &cell->prim_buffer.vertex; +      ASSERT_ALIGN16(render->vertex_data); +      send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_RENDER); +   } + +   cell->prim_buffer.num_verts = 0; + +   cell->prim_buffer.xmin = 1e100; +   cell->prim_buffer.ymin = 1e100; +   cell->prim_buffer.xmax = -1e100; +   cell->prim_buffer.ymax = -1e100; + +   /* XXX temporary, need to double-buffer the prim buffer until we get +    * a real command buffer/list system. +    */ +   cell_flush(&cell->pipe, 0x0); +} + + + +static void render_destroy( struct draw_stage *stage ) +{ +   FREE( stage ); +} + + +/** + * Create a new draw/render stage.  This will be plugged into the + * draw module as the last pipeline stage. + */ +struct draw_stage *cell_draw_render_stage( struct cell_context *cell ) +{ +   struct render_stage *render = CALLOC_STRUCT(render_stage); + +   render->cell = cell; +   render->stage.draw = cell->draw; +   render->stage.begin = render_begin; +   render->stage.point = render_point; +   render->stage.line = render_line; +   render->stage.tri = render_tri; +   render->stage.end = render_end; +   render->stage.reset_stipple_counter = reset_stipple_counter; +   render->stage.destroy = render_destroy; + +   /* +   render->quad.coef = render->coef; +   render->quad.posCoef = &render->posCoef; +   */ + +   return &render->stage; +} diff --git a/src/gallium/drivers/cell/ppu/cell_render.h b/src/gallium/drivers/cell/ppu/cell_render.h new file mode 100644 index 0000000000..826dcbafeb --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_render.h @@ -0,0 +1,39 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef CELL_RENDER_H +#define CELL_RENDER_H + +struct cell_context; +struct draw_stage; + +extern void +cell_flush_prim_buffer(struct cell_context *cell); + +extern struct draw_stage *cell_draw_render_stage( struct cell_context *cell ); + +#endif /* CELL_RENDER_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c new file mode 100644 index 0000000000..512d85d352 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -0,0 +1,176 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include "util/u_memory.h" +#include "util/u_simple_screen.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" + +#include "cell/common.h" +#include "cell_screen.h" +#include "cell_texture.h" +#include "cell_winsys.h" + + +static const char * +cell_get_vendor(struct pipe_screen *screen) +{ +   return "Tungsten Graphics, Inc."; +} + + +static const char * +cell_get_name(struct pipe_screen *screen) +{ +   return "Cell"; +} + + +static int +cell_get_param(struct pipe_screen *screen, int param) +{ +   switch (param) { +   case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: +      return CELL_MAX_SAMPLERS; +   case PIPE_CAP_NPOT_TEXTURES: +      return 1; +   case PIPE_CAP_TWO_SIDED_STENCIL: +      return 1; +   case PIPE_CAP_GLSL: +      return 1; +   case PIPE_CAP_S3TC: +      return 0; +   case PIPE_CAP_ANISOTROPIC_FILTER: +      return 0; +   case PIPE_CAP_POINT_SPRITE: +      return 1; +   case PIPE_CAP_MAX_RENDER_TARGETS: +      return 1; +   case PIPE_CAP_OCCLUSION_QUERY: +      return 1; +   case PIPE_CAP_TEXTURE_SHADOW_MAP: +      return 10; +   case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: +      return CELL_MAX_TEXTURE_LEVELS; +   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: +      return 8;  /* max 128x128x128 */ +   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: +      return CELL_MAX_TEXTURE_LEVELS; +   case PIPE_CAP_TEXTURE_MIRROR_REPEAT: +      return 1; /* XXX not really true */ +   case PIPE_CAP_TEXTURE_MIRROR_CLAMP: +      return 0; /* XXX to do */ +   default: +      return 0; +   } +} + + +static float +cell_get_paramf(struct pipe_screen *screen, int param) +{ +   switch (param) { +   case PIPE_CAP_MAX_LINE_WIDTH: +      /* fall-through */ +   case PIPE_CAP_MAX_LINE_WIDTH_AA: +      return 255.0; /* arbitrary */ + +   case PIPE_CAP_MAX_POINT_WIDTH: +      /* fall-through */ +   case PIPE_CAP_MAX_POINT_WIDTH_AA: +      return 255.0; /* arbitrary */ + +   case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: +      return 0.0; + +   case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: +      return 16.0; /* arbitrary */ + +   default: +      return 0; +   } +} + + +static boolean +cell_is_format_supported( struct pipe_screen *screen, +                          enum pipe_format format,  +                          enum pipe_texture_target target, +                          unsigned tex_usage,  +                          unsigned geom_flags ) +{ +   /* cell supports most formats, XXX for now anyway */ +   if (format == PIPE_FORMAT_DXT5_RGBA || +       format == PIPE_FORMAT_R8G8B8A8_SRGB) +      return FALSE; +   else +      return TRUE; +} + + +static void +cell_destroy_screen( struct pipe_screen *screen ) +{ +   struct pipe_winsys *winsys = screen->winsys; + +   if(winsys->destroy) +      winsys->destroy(winsys); + +   FREE(screen); +} + + +/** + * Create a new pipe_screen object + * Note: we're not presently subclassing pipe_screen (no cell_screen) but + * that would be the place to put SPU thread/context info... + */ +struct pipe_screen * +cell_create_screen(struct pipe_winsys *winsys) +{ +   struct pipe_screen *screen = CALLOC_STRUCT(pipe_screen); + +   if (!screen) +      return NULL; + +   screen->winsys = winsys; + +   screen->destroy = cell_destroy_screen; + +   screen->get_name = cell_get_name; +   screen->get_vendor = cell_get_vendor; +   screen->get_param = cell_get_param; +   screen->get_paramf = cell_get_paramf; +   screen->is_format_supported = cell_is_format_supported; + +   cell_init_screen_texture_funcs(screen); +   u_simple_screen_init(screen); + +   return screen; +} diff --git a/src/gallium/drivers/cell/ppu/cell_screen.h b/src/gallium/drivers/cell/ppu/cell_screen.h new file mode 100644 index 0000000000..c7e15889d6 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_screen.h @@ -0,0 +1,41 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef CELL_SCREEN_H +#define CELL_SCREEN_H + + +struct pipe_screen; +struct pipe_winsys; + + +extern struct pipe_screen * +cell_create_screen(struct pipe_winsys *winsys); + + +#endif /* CELL_SCREEN_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c new file mode 100644 index 0000000000..28e5e6d706 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -0,0 +1,219 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +/** + * Utility/wrappers for communicating with the SPUs. + */ + + +#include <pthread.h> + +#include "cell_spu.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" +#include "cell/common.h" + + +/* +helpful headers: +/opt/ibm/cell-sdk/prototype/src/include/ppu/cbe_mfc.h +*/ + + +/** + * Cell/SPU info that's not per-context. + */ +struct cell_global_info cell_global; + + +/** + * Scan /proc/cpuinfo to determine the timebase for the system. + * This is used by the SPUs to convert 'decrementer' ticks to seconds. + * There may be a better way to get this value... + */ +static unsigned +get_timebase(void) +{ +   FILE *f = fopen("/proc/cpuinfo", "r"); +   unsigned timebase; + +   assert(f); +   while (!feof(f)) { +      char line[80]; +      fgets(line, sizeof(line), f); +      if (strncmp(line, "timebase", 8) == 0) { +         char *colon = strchr(line, ':'); +         if (colon) { +            timebase = atoi(colon + 2); +            break; +         } +      } +   } +   fclose(f); + +   return timebase; +} + + +/** + * Write a 1-word message to the given SPE mailbox. + */ +void +send_mbox_message(spe_context_ptr_t ctx, unsigned int msg) +{ +   spe_in_mbox_write(ctx, &msg, 1, SPE_MBOX_ALL_BLOCKING); +} + + +/** + * Wait for a 1-word message to arrive in given mailbox. + */ +uint +wait_mbox_message(spe_context_ptr_t ctx) +{ +   do { +      unsigned data; +      int count = spe_out_mbox_read(ctx, &data, 1); + +      if (count == 1) { +	 return data; +      } +       +      if (count < 0) { +	 /* error */ ; +      } +   } while (1); +} + + +/** + * Called by pthread_create() to spawn an SPU thread. + */ +static void * +cell_thread_function(void *arg) +{ +   struct cell_init_info *init = (struct cell_init_info *) arg; +   unsigned entry = SPE_DEFAULT_ENTRY; + +   ASSERT_ALIGN16(init); + +   if (spe_context_run(cell_global.spe_contexts[init->id], &entry, 0, +                       init, NULL, NULL) < 0) { +      fprintf(stderr, "spe_context_run() failed\n"); +      exit(1); +   } + +   pthread_exit(NULL); +} + + +/** + * Create the SPU threads.  This is done once during driver initialization. + * This involves setting the the "init" message which is sent to each SPU. + * The init message specifies an SPU id, total number of SPUs, location + * and number of batch buffers, etc. + */ +void +cell_start_spus(struct cell_context *cell) +{ +   static boolean one_time_init = FALSE; +   uint i, j; +   uint timebase = get_timebase(); + +   if (one_time_init) { +      fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " +	      "on Cell.\n"); +      abort(); +   } + +   one_time_init = TRUE; + +   assert(cell->num_spus <= CELL_MAX_SPUS); + +   ASSERT_ALIGN16(&cell_global.inits[0]); +   ASSERT_ALIGN16(&cell_global.inits[1]); + +   /* +    * Initialize the global 'inits' structure for each SPU. +    * A pointer to the init struct will be passed to each SPU. +    * The SPUs will then each grab their init info with mfc_get(). +    */ +   for (i = 0; i < cell->num_spus; i++) { +      cell_global.inits[i].id = i; +      cell_global.inits[i].num_spus = cell->num_spus; +      cell_global.inits[i].debug_flags = cell->debug_flags; +      cell_global.inits[i].inv_timebase = 1000.0f / timebase; + +      for (j = 0; j < CELL_NUM_BUFFERS; j++) { +         cell_global.inits[i].buffers[j] = cell->buffer[j]; +      } +      cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0]; + +      cell_global.inits[i].spu_functions = &cell->spu_functions; + +      cell_global.spe_contexts[i] = spe_context_create(0, NULL); +      if (!cell_global.spe_contexts[i]) { +         fprintf(stderr, "spe_context_create() failed\n"); +         exit(1); +      } + +      if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) { +         fprintf(stderr, "spe_program_load() failed\n"); +         exit(1); +      } +       +      pthread_create(&cell_global.spe_threads[i], /* returned thread handle */ +                     NULL,                        /* pthread attribs */ +                     &cell_thread_function,       /* start routine */ +		     &cell_global.inits[i]);      /* thread argument */ +   } +} + + +/** + * Tell all the SPUs to stop/exit. + * This is done when the driver's exiting / cleaning up. + */ +void +cell_spu_exit(struct cell_context *cell) +{ +   uint i; + +   for (i = 0; i < cell->num_spus; i++) { +      send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_EXIT); +   } + +   /* wait for threads to exit */ +   for (i = 0; i < cell->num_spus; i++) { +      void *value; +      pthread_join(cell_global.spe_threads[i], &value); +      cell_global.spe_threads[i] = 0; +      cell_global.spe_contexts[i] = 0; +   } +} diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h new file mode 100644 index 0000000000..c93958a9ed --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_spu.h @@ -0,0 +1,79 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef CELL_SPU +#define CELL_SPU + + +#include <libspe2.h> +#include <pthread.h> +#include "cell/common.h" + +#include "cell_context.h" + + +/** + * Global vars, for now anyway. + */ +struct cell_global_info +{ +   /** +    * SPU/SPE handles, etc +    */ +   spe_context_ptr_t spe_contexts[CELL_MAX_SPUS]; +   pthread_t spe_threads[CELL_MAX_SPUS]; + +   /** +    * Data sent to SPUs at start-up +    */ +   struct cell_init_info inits[CELL_MAX_SPUS]; +}; + + +extern struct cell_global_info cell_global; + + +/** This is the handle for the actual SPE code */ +extern spe_program_handle_t g3d_spu; + + +extern void +send_mbox_message(spe_context_ptr_t ctx, unsigned int msg); + +extern uint +wait_mbox_message(spe_context_ptr_t ctx); + + +extern void +cell_start_spus(struct cell_context *cell); + + +extern void +cell_spu_exit(struct cell_context *cell); + + +#endif /* CELL_SPU */ diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h new file mode 100644 index 0000000000..b193170f9c --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -0,0 +1,65 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef CELL_STATE_H +#define CELL_STATE_H + + +#define CELL_NEW_VIEWPORT      0x1 +#define CELL_NEW_RASTERIZER    0x2 +#define CELL_NEW_FS            0x4 +#define CELL_NEW_BLEND         0x8 +#define CELL_NEW_CLIP          0x10 +#define CELL_NEW_SCISSOR       0x20 +#define CELL_NEW_STIPPLE       0x40 +#define CELL_NEW_FRAMEBUFFER   0x80 +#define CELL_NEW_ALPHA_TEST    0x100 +#define CELL_NEW_DEPTH_STENCIL 0x200 +#define CELL_NEW_SAMPLER       0x400 +#define CELL_NEW_TEXTURE       0x800 +#define CELL_NEW_VERTEX        0x1000 +#define CELL_NEW_VS            0x2000 +#define CELL_NEW_VS_CONSTANTS  0x4000 +#define CELL_NEW_FS_CONSTANTS  0x8000 +#define CELL_NEW_VERTEX_INFO   0x10000 + + +extern void +cell_update_derived( struct cell_context *softpipe ); + + +extern void +cell_init_shader_functions(struct cell_context *cell); + + +extern void +cell_init_vertex_functions(struct cell_context *cell); + + +#endif /* CELL_STATE_H */ + diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c new file mode 100644 index 0000000000..efc4f78364 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -0,0 +1,170 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_state.h" +#include "cell_state_emit.h" + + +/** + * Determine how to map vertex program outputs to fragment program inputs. + * Basically, this will be used when computing the triangle interpolation + * coefficients from the post-transform vertex attributes. + */ +static void +calculate_vertex_layout( struct cell_context *cell ) +{ +   const struct cell_fragment_shader_state *fs = cell->fs; +   const enum interp_mode colorInterp +      = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; +   struct vertex_info *vinfo = &cell->vertex_info; +   uint i; +   int src; + +#if 0 +   if (cell->vbuf) { +      /* if using the post-transform vertex buffer, tell draw_vbuf to +       * simply emit the whole post-xform vertex as-is: +       */ +      struct vertex_info *vinfo_vbuf = &cell->vertex_info_vbuf; +      vinfo_vbuf->num_attribs = 0; +      draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); +      vinfo_vbuf->size = 4 * vs->num_outputs + sizeof(struct vertex_header)/4; +   } +#endif + +   /* reset vinfo */ +   vinfo->num_attribs = 0; + +   /* we always want to emit vertex pos */ +   src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); +   assert(src >= 0); +   draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); + + +   /* +    * Loop over fragment shader inputs, searching for the matching output +    * from the vertex shader. +    */ +   for (i = 0; i < fs->info.num_inputs; i++) { +      switch (fs->info.input_semantic_name[i]) { +      case TGSI_SEMANTIC_POSITION: +         /* already done above */ +         break; + +      case TGSI_SEMANTIC_COLOR: +         src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR,  +                                   fs->info.input_semantic_index[i]); +         assert(src >= 0); +         draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); +         break; + +      case TGSI_SEMANTIC_FOG: +         src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0); +#if 1 +         if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ +            src = 0; +#endif +         assert(src >= 0); +         draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); +         break; + +      case TGSI_SEMANTIC_GENERIC: +         /* this includes texcoords and varying vars */ +         src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC, +                              fs->info.input_semantic_index[i]); +         assert(src >= 0); +         draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); +         break; + +      default: +         assert(0); +      } +   } + +   draw_compute_vertex_size(vinfo); + +   /* XXX only signal this if format really changes */ +   cell->dirty |= CELL_NEW_VERTEX_INFO; +} + + +#if 0 +/** + * Recompute cliprect from scissor bounds, scissor enable and surface size. + */ +static void +compute_cliprect(struct cell_context *sp) +{ +   uint surfWidth = sp->framebuffer.width; +   uint surfHeight = sp->framebuffer.height; + +   if (sp->rasterizer->scissor) { +      /* clip to scissor rect */ +      sp->cliprect.minx = MAX2(sp->scissor.minx, 0); +      sp->cliprect.miny = MAX2(sp->scissor.miny, 0); +      sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); +      sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight); +   } +   else { +      /* clip to surface bounds */ +      sp->cliprect.minx = 0; +      sp->cliprect.miny = 0; +      sp->cliprect.maxx = surfWidth; +      sp->cliprect.maxy = surfHeight; +   } +} +#endif + + + +/** + * Update derived state, send current state to SPUs prior to rendering. + */ +void cell_update_derived( struct cell_context *cell ) +{ +   if (cell->dirty & (CELL_NEW_RASTERIZER | +                      CELL_NEW_FS | +                      CELL_NEW_VS)) +      calculate_vertex_layout( cell ); + +#if 0 +   if (cell->dirty & (CELL_NEW_SCISSOR | +                      CELL_NEW_DEPTH_STENCIL_ALPHA | +                      CELL_NEW_FRAMEBUFFER)) +      compute_cliprect(cell); +#endif + +   cell_emit_state(cell); + +   cell->dirty = 0; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c new file mode 100644 index 0000000000..ff529fe22c --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -0,0 +1,340 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "cell_context.h" +#include "cell_gen_fragment.h" +#include "cell_state.h" +#include "cell_state_emit.h" +#include "cell_batch.h" +#include "cell_texture.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" + + +/** + * Find/create a cell_command_fragment_ops object corresponding to the + * current blend/stencil/z/colormask/etc. state. + */ +static struct cell_command_fragment_ops * +lookup_fragment_ops(struct cell_context *cell) +{ +   struct cell_fragment_ops_key key; +   struct cell_command_fragment_ops *ops; + +   /* +    * Build key +    */ +   memset(&key, 0, sizeof(key)); +   key.blend = *cell->blend; +   key.blend_color = cell->blend_color; +   key.dsa = *cell->depth_stencil; + +   if (cell->framebuffer.cbufs[0]) +      key.color_format = cell->framebuffer.cbufs[0]->format; +   else +      key.color_format = PIPE_FORMAT_NONE; + +   if (cell->framebuffer.zsbuf) +      key.zs_format = cell->framebuffer.zsbuf->format; +   else +      key.zs_format = PIPE_FORMAT_NONE; + +   /* +    * Look up key in cache. +    */ +   ops = (struct cell_command_fragment_ops *) +      util_keymap_lookup(cell->fragment_ops_cache, &key); + +   /* +    * If not found, create/save new fragment ops command. +    */ +   if (!ops) { +      struct spe_function spe_code_front, spe_code_back; +      unsigned int facing_dependent, total_code_size; + +      if (0) +         debug_printf("**** Create New Fragment Ops\n"); + +      /* Prepare the buffer that will hold the generated code.  The +       * "0" passed in for the size means that the SPE code will +       * use a default size. +       */ +      spe_init_func(&spe_code_front, 0); +      spe_init_func(&spe_code_back, 0); + +      /* Generate new code.  Always generate new code for both front-facing +       * and back-facing fragments, even if it's the same code in both +       * cases. +       */ +      cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front); +      cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back); + +      /* Make sure the code is a multiple of 8 bytes long; this is +       * required to ensure that the dual pipe instruction alignment +       * is correct.  It's also important for the SPU unpacking, +       * which assumes 8-byte boundaries. +       */ +      unsigned int front_code_size = spe_code_size(&spe_code_front); +      while (front_code_size % 8 != 0) { +         spe_lnop(&spe_code_front); +         front_code_size = spe_code_size(&spe_code_front); +      } +      unsigned int back_code_size = spe_code_size(&spe_code_back); +      while (back_code_size % 8 != 0) { +         spe_lnop(&spe_code_back); +         back_code_size = spe_code_size(&spe_code_back); +      } + +      /* Determine whether the code we generated is facing-dependent, by +       * determining whether the generated code is different for the front- +       * and back-facing fragments. +       */ +      if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) { +         /* Code is identical; only need one copy. */ +         facing_dependent = 0; +         total_code_size = front_code_size; +      } +      else { +         /* Code is different for front-facing and back-facing fragments. +          * Need to send both copies. +          */ +         facing_dependent = 1; +         total_code_size = front_code_size + back_code_size; +      } + +      /* alloc new fragment ops command.  Note that this structure +       * has variant length based on the total code size required. +       */ +      ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size); +      /* populate the new cell_command_fragment_ops object */ +      ops->opcode[0] = CELL_CMD_STATE_FRAGMENT_OPS; +      ops->total_code_size = total_code_size; +      ops->front_code_index = 0; +      memcpy(ops->code, spe_code_front.store, front_code_size); +      if (facing_dependent) { +        /* We have separate front- and back-facing code.  Append the +         * back-facing code to the buffer.  Be careful because the code +         * size is in bytes, but the buffer is of unsigned elements. +         */ +        ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]); +        memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size); +      } +      else { +        /* Use the same code for front- and back-facing fragments */ +        ops->back_code_index = ops->front_code_index; +      } + +      /* Set the fields for the fallback case.  Note that these fields +       * (and the whole fallback case) will eventually go away. +       */ +      ops->dsa = *cell->depth_stencil; +      ops->blend = *cell->blend; +      ops->blend_color = cell->blend_color; + +      /* insert cell_command_fragment_ops object into keymap/cache */ +      util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL); + +      /* release rtasm buffer */ +      spe_release_func(&spe_code_front); +      spe_release_func(&spe_code_back); +   } +   else { +      if (0) +         debug_printf("**** Re-use Fragment Ops\n"); +   } + +   return ops; +} + + + +static void +emit_state_cmd(struct cell_context *cell, uint cmd, +               const void *state, uint state_size) +{ +   uint32_t *dst = (uint32_t *)  +       cell_batch_alloc16(cell, ROUNDUP16(sizeof(opcode_t) + state_size)); +   *dst = cmd; +   memcpy(dst + 4, state, state_size); +} + + +/** + * For state marked as 'dirty', construct a state-update command block + * and insert it into the current batch buffer. + */ +void +cell_emit_state(struct cell_context *cell) +{ +   if (cell->dirty & CELL_NEW_FRAMEBUFFER) { +      struct pipe_surface *cbuf = cell->framebuffer.cbufs[0]; +      struct pipe_surface *zbuf = cell->framebuffer.zsbuf; +      STATIC_ASSERT(sizeof(struct cell_command_framebuffer) % 16 == 0); +      struct cell_command_framebuffer *fb +         = cell_batch_alloc16(cell, sizeof(*fb)); +      fb->opcode[0] = CELL_CMD_STATE_FRAMEBUFFER; +      fb->color_start = cell->cbuf_map[0]; +      fb->color_format = cbuf->format; +      fb->depth_start = cell->zsbuf_map; +      fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE; +      fb->width = cell->framebuffer.width; +      fb->height = cell->framebuffer.height; +#if 0 +      printf("EMIT color format %s\n", pf_name(fb->color_format)); +      printf("EMIT depth format %s\n", pf_name(fb->depth_format)); +#endif +   } + +   if (cell->dirty & (CELL_NEW_RASTERIZER)) { +      STATIC_ASSERT(sizeof(struct cell_command_rasterizer) % 16 == 0); +      struct cell_command_rasterizer *rast = +         cell_batch_alloc16(cell, sizeof(*rast)); +      rast->opcode[0] = CELL_CMD_STATE_RASTERIZER; +      rast->rasterizer = *cell->rasterizer; +   } + +   if (cell->dirty & (CELL_NEW_FS)) { +      /* Send new fragment program to SPUs */ +      STATIC_ASSERT(sizeof(struct cell_command_fragment_program) % 16 == 0); +      struct cell_command_fragment_program *fp +            = cell_batch_alloc16(cell, sizeof(*fp)); +      fp->opcode[0] = CELL_CMD_STATE_FRAGMENT_PROGRAM; +      fp->num_inst = cell->fs->code.num_inst; +      memcpy(&fp->code, cell->fs->code.store, +             SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); +      if (0) { +         int i; +         printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n"); +         for (i = 0; i < fp->num_inst; i++) { +            printf(" %3d: 0x%08x\n", i, fp->code[i]); +         } +      } +   } + +   if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) { +      const uint shader = PIPE_SHADER_FRAGMENT; +      const uint num_const = cell->constants[shader].buffer->size / sizeof(float); +      uint i, j; +      float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float))); +      uint32_t *ibuf = (uint32_t *) buf; +      const float *constants = pipe_buffer_map(cell->pipe.screen, +                                               cell->constants[shader].buffer, +                                               PIPE_BUFFER_USAGE_CPU_READ); +      ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS; +      ibuf[4] = num_const; +      j = 8; +      for (i = 0; i < num_const; i++) { +         buf[j++] = constants[i]; +      } +      pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader].buffer); +   } + +   if (cell->dirty & (CELL_NEW_FRAMEBUFFER | +                      CELL_NEW_DEPTH_STENCIL | +                      CELL_NEW_BLEND)) { +      struct cell_command_fragment_ops *fops, *fops_cmd; +      /* Note that cell_command_fragment_ops is a variant-sized record */ +      fops = lookup_fragment_ops(cell); +      fops_cmd = cell_batch_alloc16(cell, ROUNDUP16(sizeof(*fops_cmd) + fops->total_code_size)); +      memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size); +   } + +   if (cell->dirty & CELL_NEW_SAMPLER) { +      uint i; +      for (i = 0; i < CELL_MAX_SAMPLERS; i++) { +         if (cell->dirty_samplers & (1 << i)) { +            if (cell->sampler[i]) { +               STATIC_ASSERT(sizeof(struct cell_command_sampler) % 16 == 0); +               struct cell_command_sampler *sampler +                  = cell_batch_alloc16(cell, sizeof(*sampler)); +               sampler->opcode[0] = CELL_CMD_STATE_SAMPLER; +               sampler->unit = i; +               sampler->state = *cell->sampler[i]; +            } +         } +      } +      cell->dirty_samplers = 0x0; +   } + +   if (cell->dirty & CELL_NEW_TEXTURE) { +      uint i; +      for (i = 0;i < CELL_MAX_SAMPLERS; i++) { +         if (cell->dirty_textures & (1 << i)) { +            STATIC_ASSERT(sizeof(struct cell_command_texture) % 16 == 0); +            struct cell_command_texture *texture +               =  (struct cell_command_texture *)cell_batch_alloc16(cell, sizeof(*texture)); +            texture->opcode[0] = CELL_CMD_STATE_TEXTURE; +            texture->unit = i; +            if (cell->texture[i]) { +               uint level; +               for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { +                  texture->start[level] = cell->texture[i]->tiled_mapped[level]; +                  texture->width[level] = cell->texture[i]->base.width[level]; +                  texture->height[level] = cell->texture[i]->base.height[level]; +                  texture->depth[level] = cell->texture[i]->base.depth[level]; +               } +               texture->target = cell->texture[i]->base.target; +            } +            else { +               uint level; +               for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { +                  texture->start[level] = NULL; +                  texture->width[level] = 0; +                  texture->height[level] = 0; +                  texture->depth[level] = 0; +               } +               texture->target = 0; +            } +         } +      } +      cell->dirty_textures = 0x0; +   } + +   if (cell->dirty & CELL_NEW_VERTEX_INFO) { +      emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, +                     &cell->vertex_info, sizeof(struct vertex_info)); +   } + +#if 0 +   if (cell->dirty & CELL_NEW_VS) { +      const struct draw_context *const draw = cell->draw; +      struct cell_shader_info info; + +      info.num_outputs = draw_num_vs_outputs(draw); +      info.declarations = (uintptr_t) draw->vs.machine.Declarations; +      info.num_declarations = draw->vs.machine.NumDeclarations; +      info.instructions = (uintptr_t) draw->vs.machine.Instructions; +      info.num_instructions = draw->vs.machine.NumInstructions; +      info.immediates = (uintptr_t) draw->vs.machine.Imms; +      info.num_immediates = draw->vs.machine.ImmLimit / 4; + +      emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info)); +   } +#endif +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.h b/src/gallium/drivers/cell/ppu/cell_state_emit.h new file mode 100644 index 0000000000..59f8affe8d --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.h @@ -0,0 +1,36 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef CELL_STATE_EMIT_H +#define CELL_STATE_EMIT_H + + +extern void +cell_emit_state(struct cell_context *cell); + + +#endif /* CELL_STATE_EMIT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c new file mode 100644 index 0000000000..d97c22b2ef --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -0,0 +1,1430 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * Generate code to perform all per-fragment operations. + * + * Code generated by these functions perform both alpha, depth, and stencil + * testing as well as alpha blending. + * + * \note + * Occlusion query is not supported, but this is the right place to add that + * support. + * + * \author Ian Romanick <idr@us.ibm.com> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "cell_context.h" + +#include "rtasm/rtasm_ppc_spe.h" + + +/** + * Generate code to perform alpha testing. + * + * The code generated by this function uses the register specificed by + * \c mask as both an input and an output. + * + * \param dsa    Current alpha-test state + * \param f      Function to which code should be appended + * \param mask   Index of register containing active fragment mask + * \param alphas Index of register containing per-fragment alpha values + * + * \note Emits a maximum of 6 instructions. + */ +static void +emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, +                struct spe_function *f, int mask, int alphas) +{ +   /* If the alpha function is either NEVER or ALWAYS, there is no need to +    * load the reference value into a register.  ALWAYS is a fairly common +    * case, and this optimization saves 2 instructions. +    */ +   if (dsa->alpha.enabled +       && (dsa->alpha.func != PIPE_FUNC_NEVER) +       && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { +      int ref = spe_allocate_available_register(f); +      int tmp_a = spe_allocate_available_register(f); +      int tmp_b = spe_allocate_available_register(f); +      union { +         float f; +         unsigned u; +      } ref_val; +      boolean complement = FALSE; + +      ref_val.f = dsa->alpha.ref; + +      spe_il(f, ref, ref_val.u & 0x0000ffff); +      spe_ilh(f, ref, ref_val.u >> 16); + +      switch (dsa->alpha.func) { +      case PIPE_FUNC_NOTEQUAL: +         complement = TRUE; +         /* FALLTHROUGH */ + +      case PIPE_FUNC_EQUAL: +         spe_fceq(f, tmp_a, ref, alphas); +         break; + +      case PIPE_FUNC_LEQUAL: +         complement = TRUE; +         /* FALLTHROUGH */ + +      case PIPE_FUNC_GREATER: +         spe_fcgt(f, tmp_a, ref, alphas); +         break; + +      case PIPE_FUNC_LESS: +         complement = TRUE; +         /* FALLTHROUGH */ + +      case PIPE_FUNC_GEQUAL: +         spe_fcgt(f, tmp_a, ref, alphas); +         spe_fceq(f, tmp_b, ref, alphas); +         spe_or(f, tmp_a, tmp_b, tmp_a); +         break; + +      case PIPE_FUNC_ALWAYS: +      case PIPE_FUNC_NEVER: +      default: +         assert(0); +         break; +      } + +      if (complement) { +         spe_andc(f, mask, mask, tmp_a); +      } else { +         spe_and(f, mask, mask, tmp_a); +      } + +      spe_release_register(f, ref); +      spe_release_register(f, tmp_a); +      spe_release_register(f, tmp_b); +   } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) { +      spe_il(f, mask, 0); +   } +} + + +/** + * Generate code to perform Z testing.  Four Z values are tested at once. + * \param dsa        Current depth-test state + * \param f          Function to which code should be appended + * \param mask       Index of register to contain depth-pass mask + * \param stored     Index of register containing values from depth buffer + * \param calculated Index of register containing per-fragment depth values + * + * \return + * If the calculated depth comparison mask is the actual mask, \c FALSE is + * returned.  If the calculated depth comparison mask is the compliment of + * the actual mask, \c TRUE is returned. + * + * \note Emits a maximum of 3 instructions. + */ +static boolean +emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa, +                struct spe_function *f, int mask, int stored, int calculated) +{ +   unsigned func = (dsa->depth.enabled) +       ? dsa->depth.func : PIPE_FUNC_ALWAYS; +   int tmp = spe_allocate_available_register(f); +   boolean compliment = FALSE; + +   switch (func) { +   case PIPE_FUNC_NEVER: +      spe_il(f, mask, 0); +      break; + +   case PIPE_FUNC_NOTEQUAL: +      compliment = TRUE; +      /* FALLTHROUGH */ +   case PIPE_FUNC_EQUAL: +      spe_ceq(f, mask, calculated, stored); +      break; + +   case PIPE_FUNC_LEQUAL: +      compliment = TRUE; +      /* FALLTHROUGH */ +   case PIPE_FUNC_GREATER: +      spe_clgt(f, mask, calculated, stored); +      break; + +   case PIPE_FUNC_LESS: +      compliment = TRUE; +      /* FALLTHROUGH */ +   case PIPE_FUNC_GEQUAL: +      spe_clgt(f, mask, calculated, stored); +      spe_ceq(f, tmp, calculated, stored); +      spe_or(f, mask, mask, tmp); +      break; + +   case PIPE_FUNC_ALWAYS: +      spe_il(f, mask, ~0); +      break; + +   default: +      assert(0); +      break; +   } + +   spe_release_register(f, tmp); +   return compliment; +} + + +/** + * Generate code to apply the stencil operation (after testing). + * \note Emits a maximum of 5 instructions. + * + * \warning + * Since \c out and \c in might be the same register, this routine cannot + * generate code that uses \c out as a temporary. + */ +static void +emit_stencil_op(struct spe_function *f, +                int out, int in, int mask, unsigned op, unsigned ref) +{ +   const int clamp = spe_allocate_available_register(f); +   const int clamp_mask = spe_allocate_available_register(f); +   const int result = spe_allocate_available_register(f); + +   switch(op) { +   case PIPE_STENCIL_OP_KEEP: +      assert(0); +   case PIPE_STENCIL_OP_ZERO: +      spe_il(f, result, 0); +      break; +   case PIPE_STENCIL_OP_REPLACE: +      spe_il(f, result, ref); +      break; +   case PIPE_STENCIL_OP_INCR: +      /* clamp = [0xff, 0xff, 0xff, 0xff] */ +      spe_il(f, clamp, 0x0ff); +      /* result[i] = in[i] + 1 */ +      spe_ai(f, result, in, 1); +      /* clamp_mask[i] = (result[i] > 0xff) */ +      spe_clgti(f, clamp_mask, result, 0x0ff); +      /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */ +      spe_selb(f, result, result, clamp, clamp_mask); +      break; +   case PIPE_STENCIL_OP_DECR: +      spe_il(f, clamp, 0); +      spe_ai(f, result, in, -1); + +      /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned +       * arithmetic. +       */ +      spe_clgti(f, clamp_mask, result, 0x0ff); +      spe_selb(f, result, result, clamp, clamp_mask); +      break; +   case PIPE_STENCIL_OP_INCR_WRAP: +      spe_ai(f, result, in, 1); +      break; +   case PIPE_STENCIL_OP_DECR_WRAP: +      spe_ai(f, result, in, -1); +      break; +   case PIPE_STENCIL_OP_INVERT: +      spe_nor(f, result, in, in); +      break; +   default: +      assert(0); +   } + +   spe_selb(f, out, in, result, mask); + +   spe_release_register(f, result); +   spe_release_register(f, clamp_mask); +   spe_release_register(f, clamp); +} + + +/** + * Generate code to do stencil test.  Four pixels are tested at once. + * \param dsa        Depth / stencil test state + * \param face       0 for front face, 1 for back face + * \param f          Function to append instructions to + * \param mask       Register containing mask of fragments passing the + *                   alpha test + * \param depth_mask Register containing mask of fragments passing the + *                   depth test + * \param depth_compliment  Is \c depth_mask the compliment of the actual mask? + * \param stencil    Register containing values from stencil buffer + * \param depth_pass Register to store mask of fragments passing stencil test + *                   and depth test + * + * \note + * Emits a maximum of 10 + (3 * 5) = 25 instructions. + */ +static int +emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, +                  unsigned face, +                  struct spe_function *f, +                  int mask, +                  int depth_mask, +                  boolean depth_complement, +                  int stencil, +                  int depth_pass) +{ +   int stencil_fail = spe_allocate_available_register(f); +   int depth_fail = spe_allocate_available_register(f); +   int stencil_mask = spe_allocate_available_register(f); +   int stencil_pass = spe_allocate_available_register(f); +   int face_stencil = spe_allocate_available_register(f); +   int stencil_src = stencil; +   const unsigned ref = (dsa->stencil[face].ref_value +                         & dsa->stencil[face].valuemask); +   boolean complement = FALSE; +   int stored; +   int tmp = spe_allocate_available_register(f); + + +   if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) +       && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS) +       && (dsa->stencil[face].valuemask != 0x0ff)) { +      stored = spe_allocate_available_register(f); +      spe_andi(f, stored, stencil, dsa->stencil[face].valuemask); +   } else { +      stored = stencil; +   } + + +   switch (dsa->stencil[face].func) { +   case PIPE_FUNC_NEVER: +      spe_il(f, stencil_mask, 0);   /* stencil_mask[0..3] = [0,0,0,0] */ +      break; + +   case PIPE_FUNC_NOTEQUAL: +      complement = TRUE; +      /* FALLTHROUGH */ +   case PIPE_FUNC_EQUAL: +      /* stencil_mask[i] = (stored[i] == ref) */ +      spe_ceqi(f, stencil_mask, stored, ref); +      break; + +   case PIPE_FUNC_LEQUAL: +      complement = TRUE; +      /* FALLTHROUGH */ +   case PIPE_FUNC_GREATER: +      complement = TRUE; +      /* stencil_mask[i] = (stored[i] > ref) */ +      spe_clgti(f, stencil_mask, stored, ref); +      break; + +   case PIPE_FUNC_LESS: +      complement = TRUE; +      /* FALLTHROUGH */ +   case PIPE_FUNC_GEQUAL: +      /* stencil_mask[i] = (stored[i] > ref) */ +      spe_clgti(f, stencil_mask, stored, ref); +      /* tmp[i] = (stored[i] == ref) */ +      spe_ceqi(f, tmp, stored, ref); +      /* stencil_mask[i] = stencil_mask[i] | tmp[i] */ +      spe_or(f, stencil_mask, stencil_mask, tmp); +      break; + +   case PIPE_FUNC_ALWAYS: +      /* See comment below. */ +      break; + +   default: +      assert(0); +      break; +   } + +   if (stored != stencil) { +      spe_release_register(f, stored); +   } +   spe_release_register(f, tmp); + + +   /* ALWAYS is a very common stencil-test, so some effort is applied to +    * optimize that case.  The stencil-pass mask is the same as the input +    * fragment mask.  This makes the stencil-test (above) a no-op, and the +    * input fragment mask can be "renamed" the stencil-pass mask. +    */ +   if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) { +      spe_release_register(f, stencil_pass); +      stencil_pass = mask; +   } else { +      if (complement) { +         spe_andc(f, stencil_pass, mask, stencil_mask); +      } else { +         spe_and(f, stencil_pass, mask, stencil_mask); +      } +   } + +   if (depth_complement) { +      spe_andc(f, depth_pass, stencil_pass, depth_mask); +   } else { +      spe_and(f, depth_pass, stencil_pass, depth_mask); +   } + + +   /* Conditionally emit code to update the stencil value under various +    * condititons.  Note that there is no need to generate code under the +    * following circumstances: +    * +    * - Stencil write mask is zero. +    * - For stencil-fail if the stencil test is ALWAYS +    * - For depth-fail if the stencil test is NEVER +    * - For depth-pass if the stencil test is NEVER +    * - Any of the 3 conditions if the operation is KEEP +    */ +   if (dsa->stencil[face].writemask != 0) { +      if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS) +          && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) { +         if (complement) { +            spe_and(f, stencil_fail, mask, stencil_mask); +         } else { +            spe_andc(f, stencil_fail, mask, stencil_mask); +         } + +         emit_stencil_op(f, face_stencil, stencil_src, stencil_fail, +                         dsa->stencil[face].fail_op, +                         dsa->stencil[face].ref_value); + +         stencil_src = face_stencil; +      } + +      if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) +          && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) { +         if (depth_complement) { +            spe_and(f, depth_fail, stencil_pass, depth_mask); +         } else { +            spe_andc(f, depth_fail, stencil_pass, depth_mask); +         } + +         emit_stencil_op(f, face_stencil, stencil_src, depth_fail, +                         dsa->stencil[face].zfail_op, +                         dsa->stencil[face].ref_value); +         stencil_src = face_stencil; +      } + +      if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) +          && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) { +         emit_stencil_op(f, face_stencil, stencil_src, depth_pass, +                         dsa->stencil[face].zpass_op, +                         dsa->stencil[face].ref_value); +         stencil_src = face_stencil; +      } +   } + +   spe_release_register(f, stencil_fail); +   spe_release_register(f, depth_fail); +   spe_release_register(f, stencil_mask); +   if (stencil_pass != mask) { +      spe_release_register(f, stencil_pass); +   } + +   /* If all of the stencil operations were KEEP or the stencil write mask was +    * zero, "stencil_src" will still be set to "stencil".  In this case +    * release the "face_stencil" register.  Otherwise apply the stencil write +    * mask to select bits from the calculated stencil value and the previous +    * stencil value. +    */ +   if (stencil_src == stencil) { +      spe_release_register(f, face_stencil); +   } else if (dsa->stencil[face].writemask != 0x0ff) { +      int tmp = spe_allocate_available_register(f); + +      spe_il(f, tmp, dsa->stencil[face].writemask); +      spe_selb(f, stencil_src, stencil, stencil_src, tmp); + +      spe_release_register(f, tmp); +   } + +   return stencil_src; +} + + +void +cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) +{ +   struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base; +   struct spe_function *const f = &cdsa->code; + +   /* This code generates a maximum of 6 (alpha test) + 3 (depth test) +    * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions.  Round +    * up to 64 to make it a happy power-of-two. +    */ +   spe_init_func(f, SPE_INST_SIZE * 64); + + +   /* Allocate registers for the function's input parameters.  Cleverly (and +    * clever code is usually dangerous, but I couldn't resist) the generated +    * function returns a structure.  Returned structures start with register +    * 3, and the structure fields are ordered to match up exactly with the +    * input parameters. +    */ +   int mask = spe_allocate_register(f, 3); +   int depth = spe_allocate_register(f, 4); +   int stencil = spe_allocate_register(f, 5); +   int zvals = spe_allocate_register(f, 6); +   int frag_a = spe_allocate_register(f, 7); +   int facing = spe_allocate_register(f, 8); + +   int depth_mask = spe_allocate_available_register(f); + +   boolean depth_complement; + + +   emit_alpha_test(dsa, f, mask, frag_a); + +   depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals); + +   if (dsa->stencil[0].enabled) { +      const int front_depth_pass = spe_allocate_available_register(f); +      int front_stencil = emit_stencil_test(dsa, 0, f, mask, +                                            depth_mask, depth_complement, +                                            stencil, front_depth_pass); + +      if (dsa->stencil[1].enabled) { +         const int back_depth_pass = spe_allocate_available_register(f); +         int back_stencil = emit_stencil_test(dsa, 1, f, mask, +                                              depth_mask,  depth_complement, +                                              stencil, back_depth_pass); + +         /* If the front facing stencil value and the back facing stencil +          * value are stored in the same register, there is no need to select +          * a value based on the facing.  This can happen if the stencil value +          * was not modified due to the write masks being zero, the stencil +          * operations being KEEP, etc. +          */ +         if (front_stencil != back_stencil) { +            spe_selb(f, stencil, back_stencil, front_stencil, facing); +         } + +         if (back_stencil != stencil) { +            spe_release_register(f, back_stencil); +         } + +         if (front_stencil != stencil) { +            spe_release_register(f, front_stencil); +         } + +         spe_selb(f, mask, back_depth_pass, front_depth_pass, facing); + +         spe_release_register(f, back_depth_pass); +      } else { +         if (front_stencil != stencil) { +            spe_or(f, stencil, front_stencil, front_stencil); +            spe_release_register(f, front_stencil); +         } +         spe_or(f, mask, front_depth_pass, front_depth_pass); +      } + +      spe_release_register(f, front_depth_pass); +   } else if (dsa->depth.enabled) { +      if (depth_complement) { +         spe_andc(f, mask, mask, depth_mask); +      } else { +         spe_and(f, mask, mask, depth_mask); +      } +   } + +   if (dsa->depth.writemask) { +         spe_selb(f, depth, depth, zvals, mask); +   } + +   spe_bi(f, 0, 0, 0);  /* return from function call */ + + +#if 0 +   { +      const uint32_t *p = f->store; +      unsigned i; + +      printf("# alpha (%sabled)\n", +             (dsa->alpha.enabled) ? "en" : "dis"); +      printf("#    func: %u\n", dsa->alpha.func); +      printf("#    ref: %.2f\n", dsa->alpha.ref); + +      printf("# depth (%sabled)\n", +             (dsa->depth.enabled) ? "en" : "dis"); +      printf("#    func: %u\n", dsa->depth.func); + +      for (i = 0; i < 2; i++) { +         printf("# %s stencil (%sabled)\n", +                (i == 0) ? "front" : "back", +                (dsa->stencil[i].enabled) ? "en" : "dis"); + +         printf("#    func: %u\n", dsa->stencil[i].func); +         printf("#    op (sf, zf, zp): %u %u %u\n", +                dsa->stencil[i].fail_op, +                dsa->stencil[i].zfail_op, +                dsa->stencil[i].zpass_op); +         printf("#    ref value / value mask / write mask: %02x %02x %02x\n", +                dsa->stencil[i].ref_value, +                dsa->stencil[i].valuemask, +                dsa->stencil[i].writemask); +      } + +      printf("\t.text\n"); +      for (/* empty */; p < f->csr; p++) { +         printf("\t.long\t0x%04x\n", *p); +      } +      fflush(stdout); +   } +#endif +} + + +/** + * \note Emits a maximum of 3 instructions + */ +static int +emit_alpha_factor_calculation(struct spe_function *f, +                              unsigned factor, +                              int src_alpha, int dst_alpha, int const_alpha) +{ +   int factor_reg; +   int tmp; + + +   switch (factor) { +   case PIPE_BLENDFACTOR_ONE: +      factor_reg = -1; +      break; + +   case PIPE_BLENDFACTOR_SRC_ALPHA: +      factor_reg = spe_allocate_available_register(f); + +      spe_or(f, factor_reg, src_alpha, src_alpha); +      break; + +   case PIPE_BLENDFACTOR_DST_ALPHA: +      factor_reg = dst_alpha; +      break; + +   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: +      factor_reg = -1; +      break; + +   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: +      factor_reg = spe_allocate_available_register(f); + +      tmp = spe_allocate_available_register(f); +      spe_il(f, tmp, 1); +      spe_cuflt(f, tmp, tmp, 0); +      spe_fs(f, factor_reg, tmp, const_alpha); +      spe_release_register(f, tmp); +      break; + +   case PIPE_BLENDFACTOR_CONST_ALPHA: +      factor_reg = const_alpha; +      break; + +   case PIPE_BLENDFACTOR_ZERO: +      factor_reg = -1; +      break; + +   case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +      tmp = spe_allocate_available_register(f); +      factor_reg = spe_allocate_available_register(f); + +      spe_il(f, tmp, 1); +      spe_cuflt(f, tmp, tmp, 0); +      spe_fs(f, factor_reg, tmp, src_alpha); + +      spe_release_register(f, tmp); +      break; + +   case PIPE_BLENDFACTOR_INV_DST_ALPHA: +      tmp = spe_allocate_available_register(f); +      factor_reg = spe_allocate_available_register(f); + +      spe_il(f, tmp, 1); +      spe_cuflt(f, tmp, tmp, 0); +      spe_fs(f, factor_reg, tmp, dst_alpha); + +      spe_release_register(f, tmp); +      break; + +   case PIPE_BLENDFACTOR_SRC1_ALPHA: +   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: +   default: +      assert(0); +      factor_reg = -1; +      break; +   } + +   return factor_reg; +} + + +/** + * \note Emits a maximum of 6 instructions + */ +static void +emit_color_factor_calculation(struct spe_function *f, +                              unsigned sF, unsigned mask, +                              const int *src, +                              const int *dst, +                              const int *const_color, +                              int *factor) +{ +   int tmp; +   unsigned i; + + +   factor[0] = -1; +   factor[1] = -1; +   factor[2] = -1; +   factor[3] = -1; + +   switch (sF) { +   case PIPE_BLENDFACTOR_ONE: +      break; + +   case PIPE_BLENDFACTOR_SRC_COLOR: +      for (i = 0; i < 3; ++i) { +         if ((mask & (1U << i)) != 0) { +            factor[i] = spe_allocate_available_register(f); +            spe_or(f, factor[i], src[i], src[i]); +         } +      } +      break; + +   case PIPE_BLENDFACTOR_SRC_ALPHA: +      factor[0] = spe_allocate_available_register(f); +      factor[1] = factor[0]; +      factor[2] = factor[0]; + +      spe_or(f, factor[0], src[3], src[3]); +      break; + +   case PIPE_BLENDFACTOR_DST_ALPHA: +      factor[0] = dst[3]; +      factor[1] = dst[3]; +      factor[2] = dst[3]; +      break; + +   case PIPE_BLENDFACTOR_DST_COLOR: +      factor[0] = dst[0]; +      factor[1] = dst[1]; +      factor[2] = dst[2]; +      break; + +   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: +      tmp = spe_allocate_available_register(f); +      factor[0] = spe_allocate_available_register(f); +      factor[1] = factor[0]; +      factor[2] = factor[0]; + +      /* Alpha saturate means min(As, 1-Ad). +       */ +      spe_il(f, tmp, 1); +      spe_cuflt(f, tmp, tmp, 0); +      spe_fs(f, tmp, tmp, dst[3]); +      spe_fcgt(f, factor[0], tmp, src[3]); +      spe_selb(f, factor[0], src[3], tmp, factor[0]); + +      spe_release_register(f, tmp); +      break; + +   case PIPE_BLENDFACTOR_INV_CONST_COLOR: +      tmp = spe_allocate_available_register(f); +      spe_il(f, tmp, 1); +      spe_cuflt(f, tmp, tmp, 0); + +      for (i = 0; i < 3; i++) { +         factor[i] = spe_allocate_available_register(f); + +         spe_fs(f, factor[i], tmp, const_color[i]); +      } +      spe_release_register(f, tmp); +      break; + +   case PIPE_BLENDFACTOR_CONST_COLOR: +      for (i = 0; i < 3; i++) { +         factor[i] = const_color[i]; +      } +      break; + +   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: +      factor[0] = spe_allocate_available_register(f); +      factor[1] = factor[0]; +      factor[2] = factor[0]; + +      tmp = spe_allocate_available_register(f); +      spe_il(f, tmp, 1); +      spe_cuflt(f, tmp, tmp, 0); +      spe_fs(f, factor[0], tmp, const_color[3]); +      spe_release_register(f, tmp); +      break; + +   case PIPE_BLENDFACTOR_CONST_ALPHA: +      factor[0] = const_color[3]; +      factor[1] = factor[0]; +      factor[2] = factor[0]; +      break; + +   case PIPE_BLENDFACTOR_ZERO: +      break; + +   case PIPE_BLENDFACTOR_INV_SRC_COLOR: +      tmp = spe_allocate_available_register(f); + +      spe_il(f, tmp, 1); +      spe_cuflt(f, tmp, tmp, 0); + +      for (i = 0; i < 3; ++i) { +         if ((mask & (1U << i)) != 0) { +            factor[i] = spe_allocate_available_register(f); +            spe_fs(f, factor[i], tmp, src[i]); +         } +      } + +      spe_release_register(f, tmp); +      break; + +   case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +      tmp = spe_allocate_available_register(f); +      factor[0] = spe_allocate_available_register(f); +      factor[1] = factor[0]; +      factor[2] = factor[0]; + +      spe_il(f, tmp, 1); +      spe_cuflt(f, tmp, tmp, 0); +      spe_fs(f, factor[0], tmp, src[3]); + +      spe_release_register(f, tmp); +      break; + +   case PIPE_BLENDFACTOR_INV_DST_ALPHA: +      tmp = spe_allocate_available_register(f); +      factor[0] = spe_allocate_available_register(f); +      factor[1] = factor[0]; +      factor[2] = factor[0]; + +      spe_il(f, tmp, 1); +      spe_cuflt(f, tmp, tmp, 0); +      spe_fs(f, factor[0], tmp, dst[3]); + +      spe_release_register(f, tmp); +      break; + +   case PIPE_BLENDFACTOR_INV_DST_COLOR: +      tmp = spe_allocate_available_register(f); + +      spe_il(f, tmp, 1); +      spe_cuflt(f, tmp, tmp, 0); + +      for (i = 0; i < 3; ++i) { +         if ((mask & (1U << i)) != 0) { +            factor[i] = spe_allocate_available_register(f); +            spe_fs(f, factor[i], tmp, dst[i]); +         } +      } + +      spe_release_register(f, tmp); +      break; + +   case PIPE_BLENDFACTOR_SRC1_COLOR: +   case PIPE_BLENDFACTOR_SRC1_ALPHA: +   case PIPE_BLENDFACTOR_INV_SRC1_COLOR: +   case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: +   default: +      assert(0); +   } +} + + +static void +emit_blend_calculation(struct spe_function *f, +                       unsigned func, unsigned sF, unsigned dF, +                       int src, int src_factor, int dst, int dst_factor) +{ +   int tmp = spe_allocate_available_register(f); + +   switch (func) { +   case PIPE_BLEND_ADD: +      if (sF == PIPE_BLENDFACTOR_ONE) { +         if (dF == PIPE_BLENDFACTOR_ZERO) { +            /* Do nothing. */ +         } else if (dF == PIPE_BLENDFACTOR_ONE) { +            spe_fa(f, src, src, dst); +         } +      } else if (sF == PIPE_BLENDFACTOR_ZERO) { +         if (dF == PIPE_BLENDFACTOR_ZERO) { +            spe_il(f, src, 0); +         } else if (dF == PIPE_BLENDFACTOR_ONE) { +            spe_or(f, src, dst, dst); +         } else { +            spe_fm(f, src, dst, dst_factor); +         } +      } else if (dF == PIPE_BLENDFACTOR_ZERO) { +         spe_fm(f, src, src, src_factor); +      } else { +         spe_fm(f, tmp, dst, dst_factor); +         spe_fma(f, src, src, src_factor, tmp); +      } +      break; + +   case PIPE_BLEND_SUBTRACT: +      if (sF == PIPE_BLENDFACTOR_ONE) { +         if (dF == PIPE_BLENDFACTOR_ZERO) { +            /* Do nothing. */ +         } else if (dF == PIPE_BLENDFACTOR_ONE) { +            spe_fs(f, src, src, dst); +         } +      } else if (sF == PIPE_BLENDFACTOR_ZERO) { +         if (dF == PIPE_BLENDFACTOR_ZERO) { +            spe_il(f, src, 0); +         } else if (dF == PIPE_BLENDFACTOR_ONE) { +            spe_il(f, tmp, 0); +            spe_fs(f, src, tmp, dst); +         } else { +            spe_fm(f, src, dst, dst_factor); +         } +      } else if (dF == PIPE_BLENDFACTOR_ZERO) { +         spe_fm(f, src, src, src_factor); +      } else { +         spe_fm(f, tmp, dst, dst_factor); +         spe_fms(f, src, src, src_factor, tmp); +      } +      break; + +   case PIPE_BLEND_REVERSE_SUBTRACT: +      if (sF == PIPE_BLENDFACTOR_ONE) { +         if (dF == PIPE_BLENDFACTOR_ZERO) { +            spe_il(f, tmp, 0); +            spe_fs(f, src, tmp, src); +         } else if (dF == PIPE_BLENDFACTOR_ONE) { +            spe_fs(f, src, dst, src); +         } +      } else if (sF == PIPE_BLENDFACTOR_ZERO) { +         if (dF == PIPE_BLENDFACTOR_ZERO) { +            spe_il(f, src, 0); +         } else if (dF == PIPE_BLENDFACTOR_ONE) { +            spe_or(f, src, dst, dst); +         } else { +            spe_fm(f, src, dst, dst_factor); +         } +      } else if (dF == PIPE_BLENDFACTOR_ZERO) { +         spe_fm(f, src, src, src_factor); +      } else { +         spe_fm(f, tmp, src, src_factor); +         spe_fms(f, src, src, dst_factor, tmp); +      } +      break; + +   case PIPE_BLEND_MIN: +      spe_cgt(f, tmp, src, dst); +      spe_selb(f, src, src, dst, tmp); +      break; + +   case PIPE_BLEND_MAX: +      spe_cgt(f, tmp, src, dst); +      spe_selb(f, src, dst, src, tmp); +      break; + +   default: +      assert(0); +   } + +   spe_release_register(f, tmp); +} + + +/** + * Generate code to perform alpha blending on the SPE + */ +void +cell_generate_alpha_blend(struct cell_blend_state *cb) +{ +   struct pipe_blend_state *const b = &cb->base; +   struct spe_function *const f = &cb->code; + +   /* This code generates a maximum of 3 (source alpha factor) +    * + 3 (destination alpha factor) + (3 * 6) (source color factor) +    * + (3 * 6) (destination color factor) + (4 * 2) (blend equation) +    * + 4 (fragment mask) + 1 (return) = 55 instlructions.  Round up to 64 to +    * make it a happy power-of-two. +    */ +   spe_init_func(f, SPE_INST_SIZE * 64); + + +   const int frag[4] = { +      spe_allocate_register(f, 3), +      spe_allocate_register(f, 4), +      spe_allocate_register(f, 5), +      spe_allocate_register(f, 6), +   }; +   const int pixel[4] = { +      spe_allocate_register(f, 7), +      spe_allocate_register(f, 8), +      spe_allocate_register(f, 9), +      spe_allocate_register(f, 10), +   }; +   const int const_color[4] = { +      spe_allocate_register(f, 11), +      spe_allocate_register(f, 12), +      spe_allocate_register(f, 13), +      spe_allocate_register(f, 14), +   }; +   unsigned func[4]; +   unsigned sF[4]; +   unsigned dF[4]; +   unsigned i; +   int src_factor[4]; +   int dst_factor[4]; + + +   /* Does the selected blend mode make use of the source / destination +    * color (RGB) blend factors? +    */ +   boolean need_color_factor = b->blend_enable +       && (b->rgb_func != PIPE_BLEND_MIN) +       && (b->rgb_func != PIPE_BLEND_MAX); + +   /* Does the selected blend mode make use of the source / destination +    * alpha blend factors? +    */ +   boolean need_alpha_factor = b->blend_enable +       && (b->alpha_func != PIPE_BLEND_MIN) +       && (b->alpha_func != PIPE_BLEND_MAX); + + +   if (b->blend_enable) { +      sF[0] = b->rgb_src_factor; +      sF[1] = sF[0]; +      sF[2] = sF[0]; +      switch (b->alpha_src_factor & 0x0f) { +      case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: +         sF[3] = PIPE_BLENDFACTOR_ONE; +         break; +      case PIPE_BLENDFACTOR_SRC_COLOR: +      case PIPE_BLENDFACTOR_DST_COLOR: +      case PIPE_BLENDFACTOR_CONST_COLOR: +      case PIPE_BLENDFACTOR_SRC1_COLOR: +         sF[3] = b->alpha_src_factor + 1; +         break; +      default: +         sF[3] = b->alpha_src_factor; +      } + +      dF[0] = b->rgb_dst_factor; +      dF[1] = dF[0]; +      dF[2] = dF[0]; +      switch (b->alpha_dst_factor & 0x0f) { +      case PIPE_BLENDFACTOR_SRC_COLOR: +      case PIPE_BLENDFACTOR_DST_COLOR: +      case PIPE_BLENDFACTOR_CONST_COLOR: +      case PIPE_BLENDFACTOR_SRC1_COLOR: +         dF[3] = b->alpha_dst_factor + 1; +         break; +      default: +         dF[3] = b->alpha_dst_factor; +      } + +      func[0] = b->rgb_func; +      func[1] = func[0]; +      func[2] = func[0]; +      func[3] = b->alpha_func; +   } else { +      sF[0] = PIPE_BLENDFACTOR_ONE; +      sF[1] = PIPE_BLENDFACTOR_ONE; +      sF[2] = PIPE_BLENDFACTOR_ONE; +      sF[3] = PIPE_BLENDFACTOR_ONE; +      dF[0] = PIPE_BLENDFACTOR_ZERO; +      dF[1] = PIPE_BLENDFACTOR_ZERO; +      dF[2] = PIPE_BLENDFACTOR_ZERO; +      dF[3] = PIPE_BLENDFACTOR_ZERO; + +      func[0] = PIPE_BLEND_ADD; +      func[1] = PIPE_BLEND_ADD; +      func[2] = PIPE_BLEND_ADD; +      func[3] = PIPE_BLEND_ADD; +   } + + +   /* If alpha writing is enabled and the alpha blend mode requires use of +    * the alpha factor, calculate the alpha factor. +    */ +   if (((b->colormask & 8) != 0) && need_alpha_factor) { +      src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3], +                                                    frag[3], pixel[3]); + +      /* If the alpha destination blend factor is the same as the alpha source +       * blend factor, re-use the previously calculated value. +       */ +      dst_factor[3] = (dF[3] == sF[3]) +          ? src_factor[3] +          : emit_alpha_factor_calculation(f, dF[3], const_color[3], +                                          frag[3], pixel[3]); +   } + + +   if (sF[0] == sF[3]) { +      src_factor[0] = src_factor[3]; +      src_factor[1] = src_factor[3]; +      src_factor[2] = src_factor[3]; +   } else if (sF[0] == dF[3]) { +      src_factor[0] = dst_factor[3]; +      src_factor[1] = dst_factor[3]; +      src_factor[2] = dst_factor[3]; +   } else if (need_color_factor) { +      emit_color_factor_calculation(f, +                                    b->rgb_src_factor, +                                    b->colormask, +                                    frag, pixel, const_color, src_factor); +   } + + +   if (dF[0] == sF[3]) { +      dst_factor[0] = src_factor[3]; +      dst_factor[1] = src_factor[3]; +      dst_factor[2] = src_factor[3]; +   } else if (dF[0] == dF[3]) { +      dst_factor[0] = dst_factor[3]; +      dst_factor[1] = dst_factor[3]; +      dst_factor[2] = dst_factor[3]; +   } else if (dF[0] == sF[0]) { +      dst_factor[0] = src_factor[0]; +      dst_factor[1] = src_factor[1]; +      dst_factor[2] = src_factor[2]; +   } else if (need_color_factor) { +      emit_color_factor_calculation(f, +                                    b->rgb_dst_factor, +                                    b->colormask, +                                    frag, pixel, const_color, dst_factor); +   } + + + +   for (i = 0; i < 4; ++i) { +      if ((b->colormask & (1U << i)) != 0) { +         emit_blend_calculation(f, +                                func[i], sF[i], dF[i], +                                frag[i], src_factor[i], +                                pixel[i], dst_factor[i]); +      } +   } + +   spe_bi(f, 0, 0, 0); + +#if 0 +   { +      const uint32_t *p = f->store; + +      printf("# %u instructions\n", f->csr - f->store); +      printf("# blend (%sabled)\n", +             (cb->base.blend_enable) ? "en" : "dis"); +      printf("#    RGB func / sf / df: %u %u %u\n", +             cb->base.rgb_func, +             cb->base.rgb_src_factor, +             cb->base.rgb_dst_factor); +      printf("#    ALP func / sf / df: %u %u %u\n", +             cb->base.alpha_func, +             cb->base.alpha_src_factor, +             cb->base.alpha_dst_factor); + +      printf("\t.text\n"); +      for (/* empty */; p < f->csr; p++) { +         printf("\t.long\t0x%04x\n", *p); +      } +      fflush(stdout); +   } +#endif +} + + +static int +PC_OFFSET(const struct spe_function *f, const void *d) +{ +   const intptr_t pc = (intptr_t) &f->store[f->num_inst]; +   const intptr_t ea = ~0x0f & (intptr_t) d; + +   return (ea - pc) >> 2; +} + + +/** + * Generate code to perform color conversion and logic op + * + * \bug + * The code generated by this function should also perform dithering. + * + * \bug + * The code generated by this function should also perform color-write + * masking. + * + * \bug + * Only two framebuffer formats are supported at this time. + */ +void +cell_generate_logic_op(struct spe_function *f, +                       const struct pipe_blend_state *blend, +                       struct pipe_surface *surf) +{ +   const unsigned logic_op = (blend->logicop_enable) +       ? blend->logicop_func : PIPE_LOGICOP_COPY; + +   /* This code generates a maximum of 37 instructions.  An additional 32 +    * bytes (equiv. to 8 instructions) are needed for data storage.  Round up +    * to 64 to make it a happy power-of-two. +    */ +   spe_init_func(f, SPE_INST_SIZE * 64); + + +   /* Pixel colors in framebuffer format in AoS layout. +    */ +   const int pixel[4] = { +      spe_allocate_register(f, 3), +      spe_allocate_register(f, 4), +      spe_allocate_register(f, 5), +      spe_allocate_register(f, 6), +   }; + +   /* Fragment colors stored as floats in SoA layout. +    */ +   const int frag[4] = { +      spe_allocate_register(f, 7), +      spe_allocate_register(f, 8), +      spe_allocate_register(f, 9), +      spe_allocate_register(f, 10), +   }; + +   const int mask = spe_allocate_register(f, 11); + + +   /* Short-circuit the noop and invert cases. +    */ +   if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->colormask == 0)) { +      spe_bi(f, 0, 0, 0); +      return; +   } else if (logic_op == PIPE_LOGICOP_INVERT) { +      spe_nor(f, pixel[0], pixel[0], pixel[0]); +      spe_nor(f, pixel[1], pixel[1], pixel[1]); +      spe_nor(f, pixel[2], pixel[2], pixel[2]); +      spe_nor(f, pixel[3], pixel[3], pixel[3]); +      spe_bi(f, 0, 0, 0); +      return; +   } + + +   const int tmp[4] = { +      spe_allocate_available_register(f), +      spe_allocate_available_register(f), +      spe_allocate_available_register(f), +      spe_allocate_available_register(f), +   }; + +   const int shuf_xpose_hi = spe_allocate_available_register(f); +   const int shuf_xpose_lo = spe_allocate_available_register(f); +   const int shuf_color = spe_allocate_available_register(f); + + +   /* Pointer to the begining of the function's private data area. +    */ +   uint32_t *const data = ((uint32_t *) f->store) + (64 - 8); + + +   /* Convert fragment colors to framebuffer format in AoS layout. +    */ +   switch (surf->format) { +   case PIPE_FORMAT_A8R8G8B8_UNORM: +      data[0] = 0x00010203; +      data[1] = 0x10111213; +      data[2] = 0x04050607; +      data[3] = 0x14151617; +      data[4] = 0x0c000408; +      data[5] = 0x80808080; +      data[6] = 0x80808080; +      data[7] = 0x80808080; +      break; +   case PIPE_FORMAT_B8G8R8A8_UNORM: +      data[0] = 0x03020100; +      data[1] = 0x13121110; +      data[2] = 0x07060504; +      data[3] = 0x17161514; +      data[4] = 0x0804000c; +      data[5] = 0x80808080; +      data[6] = 0x80808080; +      data[7] = 0x80808080; +      break; +   default: +      fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()"); +      ASSERT(0); +   } + +   spe_ilh(f, tmp[0], 0x0808); +   spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0)); +   spe_lqr(f, shuf_color, PC_OFFSET(f, data+4)); +   spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]); + +   spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi); +   spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo); +   spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi); +   spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo); + +   spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi); +   spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo); +   spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi); +   spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo); + +   spe_cfltu(f, frag[0], frag[0], 32); +   spe_cfltu(f, frag[1], frag[1], 32); +   spe_cfltu(f, frag[2], frag[2], 32); +   spe_cfltu(f, frag[3], frag[3], 32); + +   spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color); +   spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color); +   spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color); +   spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color); + + +   /* If logic op is enabled, perform the requested logical operation on the +    * converted fragment colors and the pixel colors. +    */ +   switch (logic_op) { +   case PIPE_LOGICOP_CLEAR: +      spe_il(f, frag[0], 0); +      spe_il(f, frag[1], 0); +      spe_il(f, frag[2], 0); +      spe_il(f, frag[3], 0); +      break; +   case PIPE_LOGICOP_NOR: +      spe_nor(f, frag[0], frag[0], pixel[0]); +      spe_nor(f, frag[1], frag[1], pixel[1]); +      spe_nor(f, frag[2], frag[2], pixel[2]); +      spe_nor(f, frag[3], frag[3], pixel[3]); +      break; +   case PIPE_LOGICOP_AND_INVERTED: +      spe_andc(f, frag[0], pixel[0], frag[0]); +      spe_andc(f, frag[1], pixel[1], frag[1]); +      spe_andc(f, frag[2], pixel[2], frag[2]); +      spe_andc(f, frag[3], pixel[3], frag[3]); +      break; +   case PIPE_LOGICOP_COPY_INVERTED: +      spe_nor(f, frag[0], frag[0], frag[0]); +      spe_nor(f, frag[1], frag[1], frag[1]); +      spe_nor(f, frag[2], frag[2], frag[2]); +      spe_nor(f, frag[3], frag[3], frag[3]); +      break; +   case PIPE_LOGICOP_AND_REVERSE: +      spe_andc(f, frag[0], frag[0], pixel[0]); +      spe_andc(f, frag[1], frag[1], pixel[1]); +      spe_andc(f, frag[2], frag[2], pixel[2]); +      spe_andc(f, frag[3], frag[3], pixel[3]); +      break; +   case PIPE_LOGICOP_XOR: +      spe_xor(f, frag[0], frag[0], pixel[0]); +      spe_xor(f, frag[1], frag[1], pixel[1]); +      spe_xor(f, frag[2], frag[2], pixel[2]); +      spe_xor(f, frag[3], frag[3], pixel[3]); +      break; +   case PIPE_LOGICOP_NAND: +      spe_nand(f, frag[0], frag[0], pixel[0]); +      spe_nand(f, frag[1], frag[1], pixel[1]); +      spe_nand(f, frag[2], frag[2], pixel[2]); +      spe_nand(f, frag[3], frag[3], pixel[3]); +      break; +   case PIPE_LOGICOP_AND: +      spe_and(f, frag[0], frag[0], pixel[0]); +      spe_and(f, frag[1], frag[1], pixel[1]); +      spe_and(f, frag[2], frag[2], pixel[2]); +      spe_and(f, frag[3], frag[3], pixel[3]); +      break; +   case PIPE_LOGICOP_EQUIV: +      spe_eqv(f, frag[0], frag[0], pixel[0]); +      spe_eqv(f, frag[1], frag[1], pixel[1]); +      spe_eqv(f, frag[2], frag[2], pixel[2]); +      spe_eqv(f, frag[3], frag[3], pixel[3]); +      break; +   case PIPE_LOGICOP_OR_INVERTED: +      spe_orc(f, frag[0], pixel[0], frag[0]); +      spe_orc(f, frag[1], pixel[1], frag[1]); +      spe_orc(f, frag[2], pixel[2], frag[2]); +      spe_orc(f, frag[3], pixel[3], frag[3]); +      break; +   case PIPE_LOGICOP_COPY: +      break; +   case PIPE_LOGICOP_OR_REVERSE: +      spe_orc(f, frag[0], frag[0], pixel[0]); +      spe_orc(f, frag[1], frag[1], pixel[1]); +      spe_orc(f, frag[2], frag[2], pixel[2]); +      spe_orc(f, frag[3], frag[3], pixel[3]); +      break; +   case PIPE_LOGICOP_OR: +      spe_or(f, frag[0], frag[0], pixel[0]); +      spe_or(f, frag[1], frag[1], pixel[1]); +      spe_or(f, frag[2], frag[2], pixel[2]); +      spe_or(f, frag[3], frag[3], pixel[3]); +      break; +   case PIPE_LOGICOP_SET: +      spe_il(f, frag[0], ~0); +      spe_il(f, frag[1], ~0); +      spe_il(f, frag[2], ~0); +      spe_il(f, frag[3], ~0); +      break; + +   /* These two cases are short-circuited above. +    */ +   case PIPE_LOGICOP_INVERT: +   case PIPE_LOGICOP_NOOP: +   default: +      assert(0); +   } + + +   /* Apply fragment mask. +    */ +   spe_ilh(f, tmp[0], 0x0000); +   spe_ilh(f, tmp[1], 0x0404); +   spe_ilh(f, tmp[2], 0x0808); +   spe_ilh(f, tmp[3], 0x0c0c); + +   spe_shufb(f, tmp[0], mask, mask, tmp[0]); +   spe_shufb(f, tmp[1], mask, mask, tmp[1]); +   spe_shufb(f, tmp[2], mask, mask, tmp[2]); +   spe_shufb(f, tmp[3], mask, mask, tmp[3]); + +   spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]); +   spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]); +   spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]); +   spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]); + +   spe_bi(f, 0, 0, 0); + +#if 0 +   { +      const uint32_t *p = f->store; +      unsigned i; + +      printf("# %u instructions\n", f->csr - f->store); + +      printf("\t.text\n"); +      for (i = 0; i < 64; i++) { +         printf("\t.long\t0x%04x\n", p[i]); +      } +      fflush(stdout); +   } +#endif +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h new file mode 100644 index 0000000000..a8267a5133 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h @@ -0,0 +1,39 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef CELL_STATE_PER_FRAGMENT_H +#define CELL_STATE_PER_FRAGMENT_H + +extern void +cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa); + +extern void +cell_generate_alpha_blend(struct cell_blend_state *cb); + +extern void +cell_generate_logic_op(struct spe_function *f, +                       const struct pipe_blend_state *blend, +                       struct pipe_surface *surf); + +#endif /* CELL_STATE_PER_FRAGMENT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c new file mode 100644 index 0000000000..bf517ea563 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -0,0 +1,219 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "draw/draw_context.h" +#include "tgsi/tgsi_parse.h" + +#include "cell_context.h" +#include "cell_state.h" +#include "cell_gen_fp.h" + + +/** cast wrapper */ +static INLINE struct cell_fragment_shader_state * +cell_fragment_shader_state(void *shader) +{ +   return (struct cell_fragment_shader_state *) shader; +} + + +/** cast wrapper */ +static INLINE struct cell_vertex_shader_state * +cell_vertex_shader_state(void *shader) +{ +   return (struct cell_vertex_shader_state *) shader; +} + + +/** + * Create fragment shader state. + * Called via pipe->create_fs_state() + */ +static void * +cell_create_fs_state(struct pipe_context *pipe, +                     const struct pipe_shader_state *templ) +{ +   struct cell_context *cell = cell_context(pipe); +   struct cell_fragment_shader_state *cfs; + +   cfs = CALLOC_STRUCT(cell_fragment_shader_state); +   if (!cfs) +      return NULL; + +   cfs->shader.tokens = tgsi_dup_tokens(templ->tokens); +   if (!cfs->shader.tokens) { +      FREE(cfs); +      return NULL; +   } + +   tgsi_scan_shader(templ->tokens, &cfs->info); + +   cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code); + +   return cfs; +} + + +/** + * Called via pipe->bind_fs_state() + */ +static void +cell_bind_fs_state(struct pipe_context *pipe, void *fs) +{ +   struct cell_context *cell = cell_context(pipe); + +   cell->fs = cell_fragment_shader_state(fs); + +   cell->dirty |= CELL_NEW_FS; +} + + +/** + * Called via pipe->delete_fs_state() + */ +static void +cell_delete_fs_state(struct pipe_context *pipe, void *fs) +{ +   struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs); + +   spe_release_func(&cfs->code); + +   FREE((void *) cfs->shader.tokens); +   FREE(cfs); +} + + +/** + * Create vertex shader state. + * Called via pipe->create_vs_state() + */ +static void * +cell_create_vs_state(struct pipe_context *pipe, +                     const struct pipe_shader_state *templ) +{ +   struct cell_context *cell = cell_context(pipe); +   struct cell_vertex_shader_state *cvs; + +   cvs = CALLOC_STRUCT(cell_vertex_shader_state); +   if (!cvs) +      return NULL; + +   cvs->shader.tokens = tgsi_dup_tokens(templ->tokens); +   if (!cvs->shader.tokens) { +      FREE(cvs); +      return NULL; +   } + +   tgsi_scan_shader(templ->tokens, &cvs->info); + +   cvs->draw_data = draw_create_vertex_shader(cell->draw, &cvs->shader); +   if (cvs->draw_data == NULL) { +      FREE( (void *) cvs->shader.tokens ); +      FREE( cvs ); +      return NULL; +   } + +   return cvs; +} + + +/** + * Called via pipe->bind_vs_state() + */ +static void +cell_bind_vs_state(struct pipe_context *pipe, void *vs) +{ +   struct cell_context *cell = cell_context(pipe); + +   cell->vs = cell_vertex_shader_state(vs); + +   draw_bind_vertex_shader(cell->draw, +                           (cell->vs ? cell->vs->draw_data : NULL)); + +   cell->dirty |= CELL_NEW_VS; +} + + +/** + * Called via pipe->delete_vs_state() + */ +static void +cell_delete_vs_state(struct pipe_context *pipe, void *vs) +{ +   struct cell_context *cell = cell_context(pipe); +   struct cell_vertex_shader_state *cvs = cell_vertex_shader_state(vs); + +   draw_delete_vertex_shader(cell->draw, cvs->draw_data); +   FREE( (void *) cvs->shader.tokens ); +   FREE( cvs ); +} + + +/** + * Called via pipe->set_constant_buffer() + */ +static void +cell_set_constant_buffer(struct pipe_context *pipe, +                         uint shader, uint index, +                         const struct pipe_constant_buffer *buf) +{ +   struct cell_context *cell = cell_context(pipe); + +   assert(shader < PIPE_SHADER_TYPES); +   assert(index == 0); + +   draw_flush(cell->draw); + +   /* note: reference counting */ +   pipe_buffer_reference(pipe->screen, +                         &cell->constants[shader].buffer, +                         buf->buffer); + +   if (shader == PIPE_SHADER_VERTEX) +      cell->dirty |= CELL_NEW_VS_CONSTANTS; +   else if (shader == PIPE_SHADER_FRAGMENT) +      cell->dirty |= CELL_NEW_FS_CONSTANTS; +} + + +void +cell_init_shader_functions(struct cell_context *cell) +{ +   cell->pipe.create_fs_state = cell_create_fs_state; +   cell->pipe.bind_fs_state   = cell_bind_fs_state; +   cell->pipe.delete_fs_state = cell_delete_fs_state; + +   cell->pipe.create_vs_state = cell_create_vs_state; +   cell->pipe.bind_vs_state   = cell_bind_vs_state; +   cell->pipe.delete_vs_state = cell_delete_vs_state; + +   cell->pipe.set_constant_buffer = cell_set_constant_buffer; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c new file mode 100644 index 0000000000..fbe55c8472 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -0,0 +1,79 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "cell_context.h" +#include "cell_state.h" + +#include "draw/draw_context.h" + + +static void +cell_set_vertex_elements(struct pipe_context *pipe, +                         unsigned count, +                         const struct pipe_vertex_element *elements) +{ +   struct cell_context *cell = cell_context(pipe); + +   assert(count <= PIPE_MAX_ATTRIBS); + +   memcpy(cell->vertex_element, elements, count * sizeof(elements[0])); +   cell->num_vertex_elements = count; + +   cell->dirty |= CELL_NEW_VERTEX; + +   draw_set_vertex_elements(cell->draw, count, elements); +} + + +static void +cell_set_vertex_buffers(struct pipe_context *pipe, +                        unsigned count, +                        const struct pipe_vertex_buffer *buffers) +{ +   struct cell_context *cell = cell_context(pipe); + +   assert(count <= PIPE_MAX_ATTRIBS); + +   memcpy(cell->vertex_buffer, buffers, count * sizeof(buffers[0])); +   cell->num_vertex_buffers = count; + +   cell->dirty |= CELL_NEW_VERTEX; + +   draw_set_vertex_buffers(cell->draw, count, buffers); +} + + +void +cell_init_vertex_functions(struct cell_context *cell) +{ +   cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; +   cell->pipe.set_vertex_elements = cell_set_vertex_elements; +} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c new file mode 100644 index 0000000000..c9203fee08 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -0,0 +1,38 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "util/u_rect.h" +#include "cell_context.h" +#include "cell_surface.h" + + +void +cell_init_surface_functions(struct cell_context *cell) +{ +   cell->pipe.surface_copy = util_surface_copy; +   cell->pipe.surface_fill = util_surface_fill; +} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.h b/src/gallium/drivers/cell/ppu/cell_surface.h new file mode 100644 index 0000000000..9e58f32944 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_surface.h @@ -0,0 +1,42 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef CELL_SURFACE_H +#define CELL_SURFACE_H + + +struct cell_context; + + +extern void +cell_init_surface_functions(struct cell_context *cell); + + +#endif /* SP_SURFACE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c new file mode 100644 index 0000000000..9ba995ab7d --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -0,0 +1,531 @@ +/************************************************************************** + *  + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  *   Michel Dänzer <michel@tungstengraphics.com> +  *   Brian Paul +  */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "cell_context.h" +#include "cell_state.h" +#include "cell_texture.h" + + + +static unsigned +minify(unsigned d) +{ +   return MAX2(1, d>>1); +} + + +static void +cell_texture_layout(struct cell_texture *ct) +{ +   struct pipe_texture *pt = &ct->base; +   unsigned level; +   unsigned width = pt->width[0]; +   unsigned height = pt->height[0]; +   unsigned depth = pt->depth[0]; + +   ct->buffer_size = 0; + +   for ( level = 0 ; level <= pt->last_level ; level++ ) { +      unsigned size; +      unsigned w_tile, h_tile; + +      assert(level < CELL_MAX_TEXTURE_LEVELS); + +      /* width, height, rounded up to tile size */ +      w_tile = align(width, TILE_SIZE); +      h_tile = align(height, TILE_SIZE); + +      pt->width[level] = width; +      pt->height[level] = height; +      pt->depth[level] = depth; +      pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile);   +      pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile);   + +      ct->stride[level] = pt->nblocksx[level] * pt->block.size; + +      ct->level_offset[level] = ct->buffer_size; + +      size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size; +      if (pt->target == PIPE_TEXTURE_CUBE) +         size *= 6; +      else +         size *= depth; + +      ct->buffer_size += size; + +      width  = minify(width); +      height = minify(height); +      depth = minify(depth); +   } +} + + +static struct pipe_texture * +cell_texture_create(struct pipe_screen *screen, +                    const struct pipe_texture *templat) +{ +   struct pipe_winsys *ws = screen->winsys; +   struct cell_texture *ct = CALLOC_STRUCT(cell_texture); +   if (!ct) +      return NULL; + +   ct->base = *templat; +   ct->base.refcount = 1; +   ct->base.screen = screen; + +   cell_texture_layout(ct); + +   ct->buffer = ws->buffer_create(ws, 32, PIPE_BUFFER_USAGE_PIXEL, +                                   ct->buffer_size); + +   if (!ct->buffer) { +      FREE(ct); +      return NULL; +   } + +   return &ct->base; +} + + +static void +cell_texture_release(struct pipe_screen *screen, +                     struct pipe_texture **pt) +{ +   if (!*pt) +      return; + +   /* +   DBG("%s %p refcount will be %d\n", +       __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); +   */ +   if (--(*pt)->refcount <= 0) { +      /* Delete this texture now. +       * But note that the underlying pipe_buffer may linger... +       */ +      struct cell_texture *ct = cell_texture(*pt); +      uint i; + +      /* +      DBG("%s deleting %p\n", __FUNCTION__, (void *) ct); +      */ + +      pipe_buffer_reference(screen, &ct->buffer, NULL); + +      for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { +         /* Unreference the tiled image buffer. +          * It may not actually be deleted until a fence is hit. +          */ +         if (ct->tiled_buffer[i]) { +            ct->tiled_mapped[i] = NULL; +            pipe_buffer_reference(screen, &ct->tiled_buffer[i], NULL); +         } +      } + +      FREE(ct); +   } +   *pt = NULL; +} + + + +/** + * Convert image from linear layout to tiled layout.  4-byte pixels. + */ +static void +twiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, +                   uint src_stride, const uint *src) +{ +   const uint tile_size2 = tile_size * tile_size; +   const uint h_t = (h + tile_size - 1) / tile_size; +   const uint w_t = (w + tile_size - 1) / tile_size; + +   uint it, jt;  /* tile counters */ +   uint i, j;    /* intra-tile counters */ + +   src_stride /= 4; /* convert from bytes to pixels */ + +   /* loop over dest tiles */ +   for (it = 0; it < h_t; it++) { +      for (jt = 0; jt < w_t; jt++) { +         /* start of dest tile: */ +         uint *tdst = dst + (it * w_t + jt) * tile_size2; + +         /* compute size of this tile (may be smaller than tile_size) */ +         /* XXX note: a compiler bug was found here. That's why the code +          * looks as it does. +          */ +         uint tile_width = w - jt * tile_size; +         tile_width = MIN2(tile_width, tile_size); +         uint tile_height = h - it * tile_size; +         tile_height = MIN2(tile_height, tile_size); + +         /* loop over texels in the tile */ +         for (i = 0; i < tile_height; i++) { +            for (j = 0; j < tile_width; j++) { +               const uint srci = it * tile_size + i; +               const uint srcj = jt * tile_size + j; +               ASSERT(srci < h); +               ASSERT(srcj < w); +               tdst[i * tile_size + j] = src[srci * src_stride + srcj]; +            } +         } +      } +   } +} + + +/** + * For Cell.  Basically, rearrange the pixels/quads from this layout: + *  +--+--+--+--+ + *  |p0|p1|p2|p3|.... + *  +--+--+--+--+ + * + * to this layout: + *  +--+--+ + *  |p0|p1|.... + *  +--+--+ + *  |p2|p3| + *  +--+--+ + */ +static void +twiddle_tile(const uint *tileIn, uint *tileOut) +{ +   int y, x; + +   for (y = 0; y < TILE_SIZE; y+=2) { +      for (x = 0; x < TILE_SIZE; x+=2) { +         int k = 4 * (y/2 * TILE_SIZE/2 + x/2); +         tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k]; +         tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1]; +         tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2]; +         tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3]; +      } +   } +} + + +/** + * Convert image from tiled layout to linear layout.  4-byte pixels. + */ +static void +untwiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, +                     uint dst_stride, const uint *src) +{ +   const uint tile_size2 = tile_size * tile_size; +   const uint h_t = (h + tile_size - 1) / tile_size; +   const uint w_t = (w + tile_size - 1) / tile_size; +   uint *tile_buf; +   uint it, jt;  /* tile counters */ +   uint i, j;    /* intra-tile counters */ + +   dst_stride /= 4; /* convert from bytes to pixels */ + +   tile_buf = align_malloc(tile_size * tile_size * 4, 16); +    +   /* loop over src tiles */ +   for (it = 0; it < h_t; it++) { +      for (jt = 0; jt < w_t; jt++) { +         /* start of src tile: */ +         const uint *tsrc = src + (it * w_t + jt) * tile_size2; +          +         twiddle_tile(tsrc, tile_buf); +         tsrc = tile_buf; + +         /* compute size of this tile (may be smaller than tile_size) */ +         /* XXX note: a compiler bug was found here. That's why the code +          * looks as it does. +          */ +         uint tile_width = w - jt * tile_size; +         tile_width = MIN2(tile_width, tile_size); +         uint tile_height = h - it * tile_size; +         tile_height = MIN2(tile_height, tile_size); + +         /* loop over texels in the tile */ +         for (i = 0; i < tile_height; i++) { +            for (j = 0; j < tile_width; j++) { +               uint dsti = it * tile_size + i; +               uint dstj = jt * tile_size + j; +               ASSERT(dsti < h); +               ASSERT(dstj < w); +               dst[dsti * dst_stride + dstj] = tsrc[i * tile_size + j]; +            } +         } +      } +   } + +   align_free(tile_buf); +} + + +/** + * Convert linear texture image data to tiled format for SPU usage. + */ +static void +cell_twiddle_texture(struct pipe_screen *screen, +                     struct pipe_surface *surface) +{ +   struct cell_texture *ct = cell_texture(surface->texture); +   const uint level = surface->level; +   const uint texWidth = ct->base.width[level]; +   const uint texHeight = ct->base.height[level]; +   const uint bufWidth = align(texWidth, TILE_SIZE); +   const uint bufHeight = align(texHeight, TILE_SIZE); +   const void *map = screen->surface_map(screen, surface, PIPE_BUFFER_USAGE_CPU_READ); +   const uint *src = (const uint *) map; + +   switch (ct->base.format) { +   case PIPE_FORMAT_A8R8G8B8_UNORM: +   case PIPE_FORMAT_B8G8R8A8_UNORM: +   case PIPE_FORMAT_S8Z24_UNORM: +      { +         int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1; +         int offset = bufWidth * bufHeight * 4 * surface->face; +         uint *dst; + +         if (!ct->tiled_buffer[level]) { +            /* allocate buffer for tiled data now */ +            struct pipe_winsys *ws = screen->winsys; +            uint bytes = bufWidth * bufHeight * 4 * numFaces; +            ct->tiled_buffer[level] = +               ws->buffer_create(ws, 16, PIPE_BUFFER_USAGE_PIXEL, bytes); +            /* and map it */ +            ct->tiled_mapped[level] = +               ws->buffer_map(ws, ct->tiled_buffer[level], +                              PIPE_BUFFER_USAGE_GPU_READ); +         } +         dst = (uint *) ((ubyte *) ct->tiled_mapped[level] + offset); + +         twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, +                            surface->stride, src); +      } +      break; +   default: +      printf("Cell: twiddle unsupported texture format %s\n", +             pf_name(ct->base.format)); +   } + +   screen->surface_unmap(screen, surface); +} + + +/** + * Convert SPU tiled texture image data to linear format for app usage. + */ +static void +cell_untwiddle_texture(struct pipe_screen *screen, +                     struct pipe_surface *surface) +{ +   struct cell_texture *ct = cell_texture(surface->texture); +   const uint level = surface->level; +   const uint texWidth = ct->base.width[level]; +   const uint texHeight = ct->base.height[level]; +   const void *map = screen->surface_map(screen, surface, PIPE_BUFFER_USAGE_CPU_READ); +   const uint *src = (const uint *) ((const ubyte *) map + surface->offset); + +   switch (ct->base.format) { +   case PIPE_FORMAT_A8R8G8B8_UNORM: +   case PIPE_FORMAT_B8G8R8A8_UNORM: +   case PIPE_FORMAT_S8Z24_UNORM: +      { +         int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1; +         int offset = surface->stride * texHeight * 4 * surface->face; +         uint *dst; + +         if (!ct->untiled_data[level]) { +            ct->untiled_data[level] = +               align_malloc(surface->stride * texHeight * 4 * numFaces, 16); +         } + +         dst = (uint *) ((ubyte *) ct->untiled_data[level] + offset); + +         untwiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, +                              surface->stride, src); +      } +      break; +   default: +      { +         ct->untiled_data[level] = NULL; +         printf("Cell: untwiddle unsupported texture format %s\n", +                pf_name(ct->base.format)); +      } +   } + +   screen->surface_unmap(screen, surface); +} + + +static struct pipe_surface * +cell_get_tex_surface(struct pipe_screen *screen, +                     struct pipe_texture *pt, +                     unsigned face, unsigned level, unsigned zslice, +                     unsigned usage) +{ +   struct cell_texture *ct = cell_texture(pt); +   struct pipe_surface *ps; + +   ps = CALLOC_STRUCT(pipe_surface); +   if (ps) { +      ps->refcount = 1; +      pipe_texture_reference(&ps->texture, pt); +      ps->format = pt->format; +      ps->block = pt->block; +      ps->width = pt->width[level]; +      ps->height = pt->height[level]; +      ps->nblocksx = pt->nblocksx[level]; +      ps->nblocksy = pt->nblocksy[level]; +      ps->stride = ct->stride[level]; +      ps->offset = ct->level_offset[level]; +      ps->usage = usage; + +      /* XXX may need to override usage flags (see sp_texture.c) */ + +      pipe_texture_reference(&ps->texture, pt);  +      ps->face = face; +      ps->level = level; +      ps->zslice = zslice; + +      if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { +         ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * +            ps->nblocksy * +            ps->stride; +      } +      else { +         assert(face == 0); +         assert(zslice == 0); +      } + +      if (ps->usage & PIPE_BUFFER_USAGE_CPU_READ) { +         /* convert from tiled to linear layout */ +         cell_untwiddle_texture(screen, ps); +      } +   } +   return ps; +} + + +static void  +cell_tex_surface_release(struct pipe_screen *screen,  +                         struct pipe_surface **s) +{ +   struct cell_texture *ct = cell_texture((*s)->texture); +   const uint level = (*s)->level; +   struct pipe_surface *surf = *s; + +   if ((surf->usage & PIPE_BUFFER_USAGE_CPU_READ) && (ct->untiled_data[level])) +   { +      align_free(ct->untiled_data[level]); +      ct->untiled_data[level] = NULL; +   } + +   if ((ct->base.tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) && +       (surf->usage & PIPE_BUFFER_USAGE_CPU_WRITE)) { +      /* convert from linear to tiled layout */ +      cell_twiddle_texture(screen, surf); +   } + +   /* XXX if done rendering to teximage, re-tile */ + +   if (--surf->refcount == 0) { +      pipe_texture_reference(&surf->texture, NULL); +      FREE(surf); +   } +   *s = NULL; +} + + +static void * +cell_surface_map(struct pipe_screen *screen, +                 struct pipe_surface *surface, +                 unsigned flags) +{ +   ubyte *map; +   struct cell_texture *ct = cell_texture(surface->texture); +   const uint level = surface->level; + +   assert(ct); + +#if 0 +   if (flags & ~surface->usage) { +      assert(0); +      return NULL; +   } +#endif + +   map = pipe_buffer_map( screen, ct->buffer, flags ); +   if (map == NULL) { +      return NULL; +   } +   else { +      if ((surface->usage & PIPE_BUFFER_USAGE_CPU_READ) && +          (ct->untiled_data[level])) { +         return (void *) ((ubyte *) ct->untiled_data[level] + surface->offset); +      } +      else { +         return (void *) (map + surface->offset); +      } +   } +} + + +static void +cell_surface_unmap(struct pipe_screen *screen, +                   struct pipe_surface *surface) +{ +   struct cell_texture *ct = cell_texture(surface->texture); + +   assert(ct); + +   pipe_buffer_unmap( screen, ct->buffer ); +} + + + +void +cell_init_screen_texture_funcs(struct pipe_screen *screen) +{ +   screen->texture_create = cell_texture_create; +   screen->texture_release = cell_texture_release; + +   screen->get_tex_surface = cell_get_tex_surface; +   screen->tex_surface_release = cell_tex_surface_release; + +   screen->surface_map = cell_surface_map; +   screen->surface_unmap = cell_surface_unmap; +} diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h new file mode 100644 index 0000000000..7018b0c9bf --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -0,0 +1,72 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef CELL_TEXTURE_H +#define CELL_TEXTURE_H + + +struct cell_context; +struct pipe_texture; + + +/** + * Subclass of pipe_texture + */ +struct cell_texture +{ +   struct pipe_texture base; + +   unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS]; +   unsigned long stride[CELL_MAX_TEXTURE_LEVELS]; + +   /* The data is held here: +    */ +   struct pipe_buffer *buffer; +   unsigned long buffer_size; + +   /** Texture data in tiled layout is held here */ +   struct pipe_buffer *tiled_buffer[CELL_MAX_TEXTURE_LEVELS]; +   /** Mapped, tiled texture data */ +   void *tiled_mapped[CELL_MAX_TEXTURE_LEVELS]; +   void *untiled_data[CELL_MAX_TEXTURE_LEVELS]; +}; + + +/** cast wrapper */ +static INLINE struct cell_texture * +cell_texture(struct pipe_texture *pt) +{ +   return (struct cell_texture *) pt; +} + + + +extern void +cell_init_screen_texture_funcs(struct pipe_screen *screen); + + +#endif /* CELL_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c new file mode 100644 index 0000000000..ab54e79689 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -0,0 +1,309 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * Vertex buffer code.  The draw module transforms vertices to window + * coords, etc. and emits the vertices into buffer supplied by this module. + * When a vertex buffer is full, or we flush, we'll send the vertex data + * to the SPUs. + * + * Authors + *  Brian Paul + */ + + +#include "cell_batch.h" +#include "cell_context.h" +#include "cell_fence.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_vbuf.h" +#include "draw/draw_vbuf.h" +#include "util/u_memory.h" + + +/** Allow vertex data to be inlined after RENDER command */ +#define ALLOW_INLINE_VERTS 1 + + +/** + * Subclass of vbuf_render because we need a cell_context pointer in + * a few places. + */ +struct cell_vbuf_render +{ +   struct vbuf_render base; +   struct cell_context *cell; +   uint prim;            /**< PIPE_PRIM_x */ +   uint vertex_size;     /**< in bytes */ +   void *vertex_buffer;  /**< just for debug, really */ +   uint vertex_buf;      /**< in [0, CELL_NUM_BUFFERS-1] */ +}; + + +/** cast wrapper */ +static struct cell_vbuf_render * +cell_vbuf_render(struct vbuf_render *vbr) +{ +   return (struct cell_vbuf_render *) vbr; +} + + + +static const struct vertex_info * +cell_vbuf_get_vertex_info(struct vbuf_render *vbr) +{ +   struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); +   return &cvbr->cell->vertex_info; +} + + +static void * +cell_vbuf_allocate_vertices(struct vbuf_render *vbr, +                            ushort vertex_size, ushort nr_vertices) +{ +   struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); +   /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/ + +   assert(cvbr->vertex_buf == ~0); +   cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell); +   cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf]; +   cvbr->vertex_size = vertex_size; +   return cvbr->vertex_buffer; +} + + +static void +cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,  +                           unsigned vertex_size, unsigned vertices_used) +{ +   struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); +   struct cell_context *cell = cvbr->cell; + +   /* +   printf("%s vertex_buf = %u  count = %u\n", +          __FUNCTION__, cvbr->vertex_buf, vertices_used); +   */ + +   /* Make sure texture buffers aren't released until we're done rendering +    * with them. +    */ +   cell_add_fenced_textures(cell); + +   /* Tell SPUs they can release the vert buf */ +   if (cvbr->vertex_buf != ~0U) { +      STATIC_ASSERT(sizeof(struct cell_command_release_verts) % 16 == 0); +      struct cell_command_release_verts *release +         = (struct cell_command_release_verts *) +         cell_batch_alloc16(cell, sizeof(struct cell_command_release_verts)); +      release->opcode[0] = CELL_CMD_RELEASE_VERTS; +      release->vertex_buf = cvbr->vertex_buf; +   } + +   cvbr->vertex_buf = ~0; +   cell_flush_int(cell, 0x0); + +   assert(vertices == cvbr->vertex_buffer); +   cvbr->vertex_buffer = NULL; +} + + + +static boolean +cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) +{ +   struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); +   cvbr->prim = prim; +   /*printf("cell_set_prim %u\n", prim);*/ +   return TRUE; +} + + +static void +cell_vbuf_draw(struct vbuf_render *vbr, +	       const ushort *indices, +               uint nr_indices) +{ +   struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); +   struct cell_context *cell = cvbr->cell; +   float xmin, ymin, xmax, ymax; +   uint i; +   uint nr_vertices = 0, min_index = ~0; +   const void *vertices = cvbr->vertex_buffer; +   const uint vertex_size = cvbr->vertex_size; + +   for (i = 0; i < nr_indices; i++) { +      if (indices[i] > nr_vertices) +         nr_vertices = indices[i]; +      if (indices[i] < min_index) +         min_index = indices[i]; +   } +   nr_vertices++; + +#if 0 +   /*if (min_index > 0)*/ +      printf("%s min_index = %u\n", __FUNCTION__, min_index); +#endif + +#if 0 +   printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u\n", +          nr_indices, nr_vertices); +   printf("  "); +   for (i = 0; i < nr_indices; i += 3) { +      printf("%u %u %u, ", indices[i+0], indices[i+1], indices[i+2]); +   } +   printf("\n"); +#elif 0 +   printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u  indexes = [%u %u %u ...]\n", +          nr_indices, nr_vertices, +          indices[0], indices[1], indices[2]); +   printf("ind space = %u, vert space = %u, space = %u\n", +          nr_indices * 2, +          nr_vertices * 4 * cell->vertex_info.size, +          cell_batch_free_space(cell)); +#endif + +   /* compute x/y bounding box */ +   xmin = ymin = 1e50; +   xmax = ymax = -1e50; +   for (i = min_index; i < nr_vertices; i++) { +      const float *v = (float *) ((ubyte *) vertices + i * vertex_size); +      if (v[0] < xmin) +         xmin = v[0]; +      if (v[0] > xmax) +         xmax = v[0]; +      if (v[1] < ymin) +         ymin = v[1]; +      if (v[1] > ymax) +         ymax = v[1]; +   } +#if 0 +   printf("PPU Bounds %g, %g .. %g, %g\n", xmin, ymin, xmax, ymax); +   fflush(stdout); +#endif + +   if (cvbr->prim != PIPE_PRIM_TRIANGLES) +      return; /* only render tris for now */ + +   /* build/insert batch RENDER command */ +   { +      const uint index_bytes = ROUNDUP16(nr_indices * 2); +      const uint vertex_bytes = ROUNDUP16(nr_vertices * 4 * cell->vertex_info.size); +      STATIC_ASSERT(sizeof(struct cell_command_render) % 16 == 0); +      const uint batch_size = sizeof(struct cell_command_render) + index_bytes; + +      struct cell_command_render *render +         = (struct cell_command_render *) +         cell_batch_alloc16(cell, batch_size); + +      render->opcode[0] = CELL_CMD_RENDER; +      render->prim_type = cvbr->prim; + +      render->num_indexes = nr_indices; +      render->min_index = min_index; + +      /* append indices after render command */ +      memcpy(render + 1, indices, nr_indices * 2); + +      /* if there's room, append vertices after the indices, else leave +       * vertices in the original/separate buffer. +       */ +      render->vertex_size = 4 * cell->vertex_info.size; +      render->num_verts = nr_vertices; +      if (ALLOW_INLINE_VERTS && +          min_index == 0 && +          vertex_bytes + 16 <= cell_batch_free_space(cell)) { +         /* vertex data inlined, after indices, at 16-byte boundary */ +         void *dst = cell_batch_alloc16(cell, vertex_bytes); +         memcpy(dst, vertices, vertex_bytes); +         render->inline_verts = TRUE; +         render->vertex_buf = ~0; +      } +      else { +         /* vertex data in separate buffer */ +         render->inline_verts = FALSE; +         ASSERT(cvbr->vertex_buf >= 0); +         render->vertex_buf = cvbr->vertex_buf; +      } + +      render->xmin = xmin; +      render->ymin = ymin; +      render->xmax = xmax; +      render->ymax = ymax; +   } + +#if 0 +   /* helpful for debug */ +   cell_flush_int(cell, CELL_FLUSH_WAIT); +#endif +} + + +static void +cell_vbuf_destroy(struct vbuf_render *vbr) +{ +   struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); +   cvbr->cell->vbuf_render = NULL; +   FREE(cvbr); +} + + +/** + * Initialize the post-transform vertex buffer information for the given + * context. + */ +void +cell_init_vbuf(struct cell_context *cell) +{ +   assert(cell->draw); + +   cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render); + +   /* The max number of indexes is what can fix into a batch buffer, +    * minus the render and release-verts commands. +    */ +   cell->vbuf_render->base.max_indices +      = (CELL_BUFFER_SIZE +         - sizeof(struct cell_command_render) +         - sizeof(struct cell_command_release_verts)) +      / sizeof(ushort); +   cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE; + +   cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info; +   cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices; +   cell->vbuf_render->base.set_primitive = cell_vbuf_set_primitive; +   cell->vbuf_render->base.draw = cell_vbuf_draw; +   cell->vbuf_render->base.release_vertices = cell_vbuf_release_vertices; +   cell->vbuf_render->base.destroy = cell_vbuf_destroy; + +   cell->vbuf_render->cell = cell; +#if 1 +   cell->vbuf_render->vertex_buf = ~0; +#endif + +   cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base); +} diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.h b/src/gallium/drivers/cell/ppu/cell_vbuf.h new file mode 100644 index 0000000000..d265cbf770 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.h @@ -0,0 +1,38 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef CELL_VBUF_H +#define CELL_VBUF_H + + +struct cell_context; + +extern void +cell_init_vbuf(struct cell_context *cell); + + +#endif /* CELL_VBUF_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c new file mode 100644 index 0000000000..9cba537d9e --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -0,0 +1,346 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <inttypes.h> +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_format.h" + +#include "../auxiliary/draw/draw_context.h" +#include "../auxiliary/draw/draw_private.h" + +#include "cell_context.h" +#include "rtasm/rtasm_ppc_spe.h" + + +/** + * Emit a 4x4 matrix transpose operation + * + * \param p         Function that the transpose operation is to be appended to + * \param row0      Register containing row 0 of the source matrix + * \param row1      Register containing row 1 of the source matrix + * \param row2      Register containing row 2 of the source matrix + * \param row3      Register containing row 3 of the source matrix + * \param dest_ptr  Register containing the address of the destination matrix + * \param shuf_ptr  Register containing the address of the shuffled data + * \param count     Number of colums to actually be written to the destination + * + * \note + * This function assumes that the registers named by \c row0, \c row1, + * \c row2, and \c row3 are scratch and can be modified by the generated code. + * Furthermore, these registers will be released, via calls to + * \c release_register, by this function. + *  + * \note + * This function requires that four temporary are available on entry. + */ +static void +emit_matrix_transpose(struct spe_function *p, +		      unsigned row0, unsigned row1, unsigned row2, +		      unsigned row3, unsigned dest_ptr, +		      unsigned shuf_ptr, unsigned count) +{ +   int shuf_hi = spe_allocate_available_register(p); +   int shuf_lo = spe_allocate_available_register(p); +   int t1 = spe_allocate_available_register(p); +   int t2 = spe_allocate_available_register(p); +   int t3; +   int t4; +   int col0; +   int col1; +   int col2; +   int col3; + + +   spe_lqd(p, shuf_hi, shuf_ptr, 3*16); +   spe_lqd(p, shuf_lo, shuf_ptr, 4*16); +   spe_shufb(p, t1, row0, row2, shuf_hi); +   spe_shufb(p, t2, row0, row2, shuf_lo); + + +   /* row0 and row2 are now no longer needed.  Re-use those registers as +    * temporaries. +    */ +   t3 = row0; +   t4 = row2; + +   spe_shufb(p, t3, row1, row3, shuf_hi); +   spe_shufb(p, t4, row1, row3, shuf_lo); + + +   /* row1 and row3 are now no longer needed.  Re-use those registers as +    * temporaries. +    */ +   col0 = row1; +   col1 = row3; + +   spe_shufb(p, col0, t1, t3, shuf_hi); +   if (count > 1) { +      spe_shufb(p, col1, t1, t3, shuf_lo); +   } + +   /* t1 and t3 are now no longer needed.  Re-use those registers as +    * temporaries. +    */ +   col2 = t1; +   col3 = t3; + +   if (count > 2) { +      spe_shufb(p, col2, t2, t4, shuf_hi); +   } + +   if (count > 3) { +      spe_shufb(p, col3, t2, t4, shuf_lo); +   } + + +   /* Store the results.  Remember that the stqd instruction is encoded using +    * the qword offset (stand-alone assemblers to the byte-offset to +    * qword-offset conversion for you), so the byte-offset needs be divided by +    * 16. +    */ +   switch (count) { +   case 4: +      spe_stqd(p, col3, dest_ptr, 3 * 16); +   case 3: +      spe_stqd(p, col2, dest_ptr, 2 * 16); +   case 2: +      spe_stqd(p, col1, dest_ptr, 1 * 16); +   case 1: +      spe_stqd(p, col0, dest_ptr, 0 * 16); +   } + + +   /* Release all of the temporary registers used. +    */ +   spe_release_register(p, col0); +   spe_release_register(p, col1); +   spe_release_register(p, col2); +   spe_release_register(p, col3); +   spe_release_register(p, shuf_hi); +   spe_release_register(p, shuf_lo); +   spe_release_register(p, t2); +   spe_release_register(p, t4); +} + + +#if 0 +/* This appears to not be used currently */ +static void +emit_fetch(struct spe_function *p, +	   unsigned in_ptr, unsigned *offset, +	   unsigned out_ptr, unsigned shuf_ptr, +	   enum pipe_format format) +{ +   const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0) +       + (pf_size_z(format) != 0) + (pf_size_w(format) != 0); +   const unsigned type = pf_type(format); +   const unsigned bytes = pf_size_x(format); + +   int v0 = spe_allocate_available_register(p); +   int v1 = spe_allocate_available_register(p); +   int v2 = spe_allocate_available_register(p); +   int v3 = spe_allocate_available_register(p); +   int tmp = spe_allocate_available_register(p); +   int float_zero = -1; +   int float_one = -1; +   float scale_signed = 0.0; +   float scale_unsigned = 0.0; + +   spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16); +   spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16); +   spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16); +   spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16); +   offset[0] += 4; +    +   switch (bytes) { +   case 1: +      scale_signed = 1.0f / 127.0f; +      scale_unsigned = 1.0f / 255.0f; +      spe_lqd(p, tmp, shuf_ptr, 1 * 16); +      spe_shufb(p, v0, v0, v0, tmp); +      spe_shufb(p, v1, v1, v1, tmp); +      spe_shufb(p, v2, v2, v2, tmp); +      spe_shufb(p, v3, v3, v3, tmp); +      break; +   case 2: +      scale_signed = 1.0f / 32767.0f; +      scale_unsigned = 1.0f / 65535.0f; +      spe_lqd(p, tmp, shuf_ptr, 2 * 16); +      spe_shufb(p, v0, v0, v0, tmp); +      spe_shufb(p, v1, v1, v1, tmp); +      spe_shufb(p, v2, v2, v2, tmp); +      spe_shufb(p, v3, v3, v3, tmp); +      break; +   case 4: +      scale_signed = 1.0f / 2147483647.0f; +      scale_unsigned = 1.0f / 4294967295.0f; +      break; +   default: +      assert(0); +      break; +   } + +   switch (type) { +   case PIPE_FORMAT_TYPE_FLOAT: +      break; +   case PIPE_FORMAT_TYPE_UNORM: +      spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16); +      spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff); +      spe_cuflt(p, v0, v0, 0); +      spe_fm(p, v0, v0, tmp); +      break; +   case PIPE_FORMAT_TYPE_SNORM: +      spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16); +      spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff); +      spe_csflt(p, v0, v0, 0); +      spe_fm(p, v0, v0, tmp); +      break; +   case PIPE_FORMAT_TYPE_USCALED: +      spe_cuflt(p, v0, v0, 0); +      break; +   case PIPE_FORMAT_TYPE_SSCALED: +      spe_csflt(p, v0, v0, 0); +      break; +   } + + +   if (count < 4) { +      float_one = spe_allocate_available_register(p); +      spe_il(p, float_one, 1); +      spe_cuflt(p, float_one, float_one, 0); +       +      if (count < 3) { +	 float_zero = spe_allocate_available_register(p); +	 spe_il(p, float_zero, 0); +      } +   } + +   spe_release_register(p, tmp); + +   emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count); + +   switch (count) { +   case 1: +      spe_stqd(p, float_zero, out_ptr, 1 * 16); +   case 2: +      spe_stqd(p, float_zero, out_ptr, 2 * 16); +   case 3: +      spe_stqd(p, float_one, out_ptr, 3 * 16); +   } + +   if (float_zero != -1) { +      spe_release_register(p, float_zero); +   } + +   if (float_one != -1) { +      spe_release_register(p, float_one); +   } +} +#endif + + +void cell_update_vertex_fetch(struct draw_context *draw) +{ +#if 0 +   struct cell_context *const cell = +       (struct cell_context *) draw->driver_private; +   struct spe_function *p = &cell->attrib_fetch; +   unsigned function_index[PIPE_MAX_ATTRIBS]; +   unsigned unique_attr_formats; +   int out_ptr; +   int in_ptr; +   int shuf_ptr; +   unsigned i; +   unsigned j; + + +   /* Determine how many unique input attribute formats there are.  At the +    * same time, store the index of the lowest numbered attribute that has +    * the same format as any non-unique format. +    */ +   unique_attr_formats = 1; +   function_index[0] = 0; +   for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) { +      const enum pipe_format curr_fmt = draw->vertex_element[i].src_format; + +      for (j = 0; j < i; j++) { +	 if (curr_fmt == draw->vertex_element[j].src_format) { +	    break; +	 } +      } +       +      if (j == i) { +	 unique_attr_formats++; +      } + +      function_index[i] = j; +   } + + +   /* Each fetch function can be a maximum of 34 instructions (note: this is +    * actually a slight over-estimate). +    */ +   spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats); + + +   /* Allocate registers for the function's input parameters. +    */ +   out_ptr = spe_allocate_register(p, 3); +   in_ptr = spe_allocate_register(p, 4); +   shuf_ptr = spe_allocate_register(p, 5); + + +   /* Generate code for the individual attribute fetch functions. +    */ +   for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { +      unsigned offset; + +      if (function_index[i] == i) { +	 cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr  +						     - (void *) p->store); + +	 offset = 0; +	 emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr, +		    draw->vertex_element[i].src_format); +	 spe_bi(p, 0, 0, 0); + +	 /* Round up to the next 16-byte boundary. +	  */ +	 if ((((unsigned) p->store) & 0x0f) != 0) { +	    const unsigned align = ((unsigned) p->store) & 0x0f; +	    p->store = (uint32_t *) (((void *) p->store) + align); +	 } +      } else { +	 /* Use the same function entry-point as a previously seen attribute +	  * with the same format. +	  */ +	 cell->attrib_fetch_offsets[i] =  +	     cell->attrib_fetch_offsets[function_index[i]]; +      } +   } +#else +   assert(0); +#endif +} diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c new file mode 100644 index 0000000000..403cf6d50f --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -0,0 +1,146 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file cell_vertex_shader.c + * Vertex shader interface routines for Cell. + * + * \author Ian Romanick <idr@us.ibm.com> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_math.h" + +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_batch.h" + +#include "cell/common.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" + +/** + * Run the vertex shader on all vertices in the vertex queue. + * Called by the draw module when the vertx cache needs to be flushed. + */ +void +cell_vertex_shader_queue_flush(struct draw_context *draw) +{ +#if 0 +   struct cell_context *const cell = +       (struct cell_context *) draw->driver_private; +   struct cell_command_vs *const vs = &cell_global.command[0].vs; +   uint64_t *batch; +   struct cell_array_info *array_info; +   unsigned i, j; +   struct cell_attribute_fetch_code *cf; + +   assert(draw->vs.queue_nr != 0); + +   /* XXX: do this on statechange:  +    */ +   draw_update_vertex_fetch(draw); +   cell_update_vertex_fetch(draw); + + +   batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*cf)); +   batch[0] = CELL_CMD_STATE_ATTRIB_FETCH; +   cf = (struct cell_attribute_fetch_code *) (&batch[1]); +   cf->base = (uint64_t) cell->attrib_fetch.store; +   cf->size = ROUNDUP16((unsigned)((void *) cell->attrib_fetch.csr  +				   - (void *) cell->attrib_fetch.store)); + + +   for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { +      const enum pipe_format format = draw->vertex_element[i].src_format; +      const unsigned count = ((pf_size_x(format) != 0) +			      + (pf_size_y(format) != 0) +			      + (pf_size_z(format) != 0) +			      + (pf_size_w(format) != 0)); +      const unsigned size = pf_size_x(format) * count; + +      batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info)); + +      batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO; + +      array_info = (struct cell_array_info *) &batch[1]; +      assert(draw->vertex_fetch.src_ptr[i] != NULL); +      array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i]; +      array_info->attr = i; +      array_info->pitch = draw->vertex_fetch.pitch[i]; +      array_info->size = size; +      array_info->function_offset = cell->attrib_fetch_offsets[i]; +   } + +   batch = cell_batch_alloc(cell, sizeof(batch[0]) +                            + sizeof(struct pipe_viewport_state)); +   batch[0] = CELL_CMD_STATE_VIEWPORT; +   (void) memcpy(&batch[1], &draw->viewport, +                 sizeof(struct pipe_viewport_state)); + +   { +      uint64_t uniforms = (uintptr_t) draw->user.constants; + +      batch = cell_batch_alloc(cell, 2 *sizeof(batch[0])); +      batch[0] = CELL_CMD_STATE_UNIFORMS; +      batch[1] = uniforms; +   } + +   cell_batch_flush(cell); + +   vs->opcode = CELL_CMD_VS_EXECUTE; +   vs->nr_attrs = draw->vertex_fetch.nr_attrs; + +   (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane)); +   vs->nr_planes = draw->nr_planes; + +   for (i = 0; i < draw->vs.queue_nr; i += SPU_VERTS_PER_BATCH) { +      const unsigned n = MIN2(SPU_VERTS_PER_BATCH, draw->vs.queue_nr - i); + +      for (j = 0; j < n; j++) { +         vs->elts[j] = draw->vs.queue[i + j].elt; +         vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; +      } + +      for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) { +         vs->elts[j] = vs->elts[0]; +         vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; +      } + +      vs->num_elts = n; +      send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE); + +      cell_flush_int(cell, CELL_FLUSH_WAIT); +   } + +   draw->vs.post_nr = draw->vs.queue_nr; +   draw->vs.queue_nr = 0; +#else +   assert(0); +#endif +} diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.h b/src/gallium/drivers/cell/ppu/cell_winsys.h new file mode 100644 index 0000000000..ae2af5696b --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_winsys.h @@ -0,0 +1,50 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef CELL_WINSYS_H +#define CELL_WINSYS_H + +#include "pipe/p_compiler.h" + + +/** + * Very simple winsys at this time. + * Will probably eventually add SPU control info. + */ +struct cell_winsys +{ +   uint preferredFormat; +}; + + +extern struct cell_winsys * +cell_get_winsys(uint format); + + + +#endif diff --git a/src/gallium/drivers/cell/spu/.gitignore b/src/gallium/drivers/cell/spu/.gitignore new file mode 100644 index 0000000000..2be9a2d324 --- /dev/null +++ b/src/gallium/drivers/cell/spu/.gitignore @@ -0,0 +1 @@ +g3d_spu diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile new file mode 100644 index 0000000000..116453b79c --- /dev/null +++ b/src/gallium/drivers/cell/spu/Makefile @@ -0,0 +1,82 @@ +# Gallium3D Cell driver: SPU code + +# This makefile builds the g3d_spu.a file that's linked into the +# PPU code/library. + + +TOP = ../../../../.. +include $(TOP)/configs/current + + +PROG = g3d + +PROG_SPU = $(PROG)_spu +PROG_SPU_A = $(PROG)_spu.a +PROG_SPU_EMBED_O = $(PROG)_spu-embed.o + + +SOURCES = \ +	spu_command.c \ +	spu_dcache.c \ +	spu_funcs.c \ +	spu_main.c \ +	spu_per_fragment_op.c \ +	spu_render.c \ +	spu_texture.c \ +	spu_tile.c \ +	spu_tri.c + +OLD_SOURCES = \ +	spu_exec.c \ +	spu_util.c \ +	spu_vertex_fetch.c \ +	spu_vertex_shader.c + + +SPU_OBJECTS = $(SOURCES:.c=.o) \ + +SPU_ASM_OUT = $(SOURCES:.c=.s) \ + +INCLUDE_DIRS = \ +	-I$(TOP)/src/mesa \ +	-I$(TOP)/src/gallium/include \ +	-I$(TOP)/src/gallium/auxiliary \ +	-I$(TOP)/src/gallium/drivers + + +.c.o: +	$(SPU_CC) $(SPU_CFLAGS) -c $< + +.c.s: +	$(SPU_CC) $(SPU_CFLAGS) -O3 -S $< + + +# The .a file will be linked into the main/PPU executable +default: $(PROG_SPU_A) + +$(PROG_SPU_A): $(PROG_SPU_EMBED_O) +	$(SPU_AR) $(SPU_AR_FLAGS) $(PROG_SPU_A) $(PROG_SPU_EMBED_O) + +$(PROG_SPU_EMBED_O): $(PROG_SPU) +	$(SPU_EMBED) $(SPU_EMBED_FLAGS) $(PROG_SPU) $(PROG_SPU) $(PROG_SPU_EMBED_O) + +$(PROG_SPU): $(SPU_OBJECTS) +	$(SPU_CC) -o $(PROG_SPU) $(SPU_OBJECTS) $(SPU_LFLAGS) + + + +asmfiles: $(SPU_ASM_OUT) + + +clean: +	rm -f *~ *.o *.a *.d *.s $(PROG_SPU) + + + +depend: $(SOURCES) +	rm -f depend +	touch depend +	$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null + +include depend + diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h new file mode 100644 index 0000000000..d7ce005524 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_colorpack.h @@ -0,0 +1,145 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + + +#ifndef SPU_COLORPACK_H +#define SPU_COLORPACK_H + + +#include <transpose_matrix4x4.h> +#include <spu_intrinsics.h> + + +static INLINE unsigned int +spu_pack_R8G8B8A8(vector float rgba) +{ +  vector unsigned int out = spu_convtu(rgba, 32); + +  out = spu_shuffle(out, out, ((vector unsigned char) { +                                  0, 4, 8, 12, 0, 0, 0, 0,  +                                  0, 0, 0, 0, 0, 0, 0, 0 }) ); + +  return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_A8R8G8B8(vector float rgba) +{ +  vector unsigned int out = spu_convtu(rgba, 32); +  out = spu_shuffle(out, out, ((vector unsigned char) { +                                  12, 0, 4, 8, 0, 0, 0, 0,  +                                  0, 0, 0, 0, 0, 0, 0, 0}) ); +  return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_B8G8R8A8(vector float rgba) +{ +  vector unsigned int out = spu_convtu(rgba, 32); +  out = spu_shuffle(out, out, ((vector unsigned char) { +                                  8, 4, 0, 12, 0, 0, 0, 0,  +                                  0, 0, 0, 0, 0, 0, 0, 0}) ); +  return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle) +{ +  vector unsigned int out = spu_convtu(rgba, 32); +  out = spu_shuffle(out, out, shuffle); +  return spu_extract(out, 0); +} + + +static INLINE vector float +spu_unpack_B8G8R8A8(uint color) +{ +   vector unsigned int color_u4 = spu_splats(color); +   color_u4 = spu_shuffle(color_u4, color_u4, +                          ((vector unsigned char) { +                             2, 2, 2, 2, +                             1, 1, 1, 1, +                             0, 0, 0, 0, +                             3, 3, 3, 3}) ); +   return spu_convtf(color_u4, 32); +} + + +static INLINE vector float +spu_unpack_A8R8G8B8(uint color) +{ +   vector unsigned int color_u4 = spu_splats(color); +   color_u4 = spu_shuffle(color_u4, color_u4, +                          ((vector unsigned char) { +                             1, 1, 1, 1, +                             2, 2, 2, 2, +                             3, 3, 3, 3, +                             0, 0, 0, 0}) ); +   return spu_convtf(color_u4, 32); +} + + +/** + * \param color_in - array of 32-bit packed ARGB colors + * \param color_out - returns float colors in RRRR, GGGG, BBBB, AAAA order + */ +static INLINE void +spu_unpack_A8R8G8B8_transpose4(const vector unsigned int color_in[4], +                               vector float color_out[4]) +{ +   vector unsigned int c0; + +   c0 = spu_shuffle(color_in[0], color_in[0], +                    ((vector unsigned char) { +                       1, 1, 1, 1,  2, 2, 2, 2,  3, 3, 3, 3,  0, 0, 0, 0}) ); +   color_out[0] = spu_convtf(c0, 32); + +   c0 = spu_shuffle(color_in[1], color_in[1], +                    ((vector unsigned char) { +                       1, 1, 1, 1,  2, 2, 2, 2,  3, 3, 3, 3,  0, 0, 0, 0}) ); +   color_out[1] = spu_convtf(c0, 32); + +   c0 = spu_shuffle(color_in[2], color_in[2], +                    ((vector unsigned char) { +                       1, 1, 1, 1,  2, 2, 2, 2,  3, 3, 3, 3,  0, 0, 0, 0}) ); +   color_out[2] = spu_convtf(c0, 32); + +   c0 = spu_shuffle(color_in[3], color_in[3], +                    ((vector unsigned char) { +                       1, 1, 1, 1,  2, 2, 2, 2,  3, 3, 3, 3,  0, 0, 0, 0}) ); +   color_out[3] = spu_convtf(c0, 32); + +   _transpose_matrix4x4(color_out, color_out); +} + + + +#endif /* SPU_COLORPACK_H */ diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c new file mode 100644 index 0000000000..5c0179d954 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -0,0 +1,815 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +/** + * SPU command processing code + */ + + +#include <stdio.h> +#include <libmisc.h> + +#include "pipe/p_defines.h" + +#include "spu_command.h" +#include "spu_main.h" +#include "spu_render.h" +#include "spu_per_fragment_op.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_vertex_shader.h" +#include "spu_dcache.h" +#include "cell/common.h" + + +struct spu_vs_context draw; + + +/** + * Buffers containing dynamically generated SPU code: + */ +static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] +    ALIGN16_ATTRIB; + + + +static INLINE int +align(int value, int alignment) +{ +   return (value + alignment - 1) & ~(alignment - 1); +} + + + +/** + * Tell the PPU that this SPU has finished copying a buffer to + * local store and that it may be reused by the PPU. + * This is done by writting a 16-byte batch-buffer-status block back into + * main memory (in cell_context->buffer_status[]). + */ +static void +release_buffer(uint buffer) +{ +   /* Evidently, using less than a 16-byte status doesn't work reliably */ +   static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE, +                                              CELL_BUFFER_STATUS_FREE, +                                              CELL_BUFFER_STATUS_FREE, +                                              CELL_BUFFER_STATUS_FREE}; +   const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); +   uint *dst = spu.init.buffer_status + index; + +   ASSERT(buffer < CELL_NUM_BUFFERS); + +   mfc_put((void *) &status,    /* src in local memory */ +           (unsigned int) dst,  /* dst in main memory */ +           sizeof(status),      /* size */ +           TAG_MISC,            /* tag is unimportant */ +           0, /* tid */ +           0  /* rid */); +} + + +/** + * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory. + * There's a qword of status per SPU. + */ +static void +cmd_fence(struct cell_command_fence *fence_cmd) +{ +   static const vector unsigned int status = {CELL_FENCE_SIGNALLED, +                                              CELL_FENCE_SIGNALLED, +                                              CELL_FENCE_SIGNALLED, +                                              CELL_FENCE_SIGNALLED}; +   uint *dst = (uint *) fence_cmd->fence; +   dst += 4 * spu.init.id;  /* main store/memory address, not local store */ +   ASSERT_ALIGN16(dst); +   mfc_put((void *) &status,    /* src in local memory */ +           (unsigned int) dst,  /* dst in main memory */ +           sizeof(status),      /* size */ +           TAG_FENCE,           /* tag */ +           0, /* tid */ +           0  /* rid */); +} + + +static void +cmd_clear_surface(const struct cell_command_clear_surface *clear) +{ +   D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); + +   if (clear->surface == 0) { +      spu.fb.color_clear_value = clear->value; +      if (spu.init.debug_flags & CELL_DEBUG_CHECKER) { +         uint x = (spu.init.id << 4) | (spu.init.id << 12) | +            (spu.init.id << 20) | (spu.init.id << 28); +         spu.fb.color_clear_value ^= x; +      } +   } +   else { +      spu.fb.depth_clear_value = clear->value; +   } + +#define CLEAR_OPT 1 +#if CLEAR_OPT + +   /* Simply set all tiles' status to CLEAR. +    * When we actually begin rendering into a tile, we'll initialize it to +    * the clear value.  If any tiles go untouched during the frame, +    * really_clear_tiles() will set them to the clear value. +    */ +   if (clear->surface == 0) { +      memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); +   } +   else { +      memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); +   } + +#else + +   /* +    * This path clears the whole framebuffer to the clear color right now. +    */ + +   /* +   printf("SPU: %s num=%d w=%d h=%d\n", +          __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); +   */ + +   /* init a single tile to the clear value */ +   if (clear->surface == 0) { +      clear_c_tile(&spu.ctile); +   } +   else { +      clear_z_tile(&spu.ztile); +   } + +   /* walk over my tiles, writing the 'clear' tile's data */ +   { +      const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; +      uint i; +      for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { +         uint tx = i % spu.fb.width_tiles; +         uint ty = i / spu.fb.width_tiles; +         if (clear->surface == 0) +            put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); +         else +            put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); +      } +   } + +   if (spu.init.debug_flags & CELL_DEBUG_SYNC) { +      wait_on_mask(1 << TAG_SURFACE_CLEAR); +   } + +#endif /* CLEAR_OPT */ + +   D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n"); +} + + +static void +cmd_release_verts(const struct cell_command_release_verts *release) +{ +   D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf); +   ASSERT(release->vertex_buf != ~0U); +   release_buffer(release->vertex_buf); +} + + +/** + * Process a CELL_CMD_STATE_FRAGMENT_OPS command. + * This involves installing new fragment ops SPU code. + * If this function is never called, we'll use a regular C fallback function + * for fragment processing. + */ +static void +cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) +{ +   D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n"); + +   /* Copy state info (for fallback case only - this will eventually +    * go away when the fallback case goes away) +    */ +   memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); +   memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); +   memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color)); + +   /* Make sure the SPU knows which buffers it's expected to read when +    * it's told to pull tiles. +    */ +   spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled); + +   /* If we're forcing the fallback code to be used (for debug purposes), +    * install that.  Otherwise install the incoming SPU code. +    */ +   if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) { +      static unsigned int warned = 0; +      if (!warned) { +         fprintf(stderr, "Cell Warning: using fallback per-fragment code\n"); +         warned = 1; +      } +      /* The following two lines aren't really necessary if you +       * know the debug flags won't change during a run, and if you +       * know that the function pointers are initialized correctly. +       * We set them here to allow a person to change the debug +       * flags during a run (from inside a debugger). +       */ +      spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; +      spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; +      return; +   } + +   /* Make sure the SPU code buffer is large enough to hold the incoming code. +    * Note that we *don't* use align_malloc() and align_free(), because +    * those utility functions are *not* available in SPU code. +    * */ +   if (spu.fragment_ops_code_size < fops->total_code_size) { +      if (spu.fragment_ops_code != NULL) { +         free(spu.fragment_ops_code); +      } +      spu.fragment_ops_code_size = fops->total_code_size; +      spu.fragment_ops_code = malloc(fops->total_code_size); +      if (spu.fragment_ops_code == NULL) { +         /* Whoops. */ +         fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size); +         spu.fragment_ops_code = NULL; +         spu.fragment_ops_code_size = 0; +         spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; +         spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; +         return; +      } +   } + +   /* Copy the SPU code from the command buffer to the spu buffer */ +   memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size); + +   /* Set the pointers for the front-facing and back-facing fragments +    * to the specified offsets within the code.  Note that if the +    * front-facing and back-facing code are the same, they'll have +    * the same offset. +    */ +   spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index]; +   spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index]; +} + +static void +cmd_state_fragment_program(const struct cell_command_fragment_program *fp) +{ +   D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n"); +   /* Copy SPU code from batch buffer to spu buffer */ +   memcpy(spu.fragment_program_code, fp->code, +          SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); +#if 01 +   /* Point function pointer at new code */ +   spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code; +#endif +} + + +static uint +cmd_state_fs_constants(const qword *buffer, uint pos) +{ +   const uint num_const = spu_extract((vector unsigned int)buffer[pos+1], 0); +   const float *constants = (const float *) &buffer[pos+2]; +   uint i; + +   D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const); + +   /* Expand each float to float[4] for SOA execution */ +   for (i = 0; i < num_const; i++) { +      D_PRINTF(CELL_DEBUG_CMD, "  const[%u] = %f\n", i, constants[i]); +      spu.constants[i] = spu_splats(constants[i]); +   } + +   /* return new buffer pos (in 16-byte words) */ +   return pos + 2 + (ROUNDUP16(num_const * sizeof(float)) / 16); +} + + +static void +cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) +{ +   D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x  zformat 0x%x\n", +             cmd->width, +             cmd->height, +             cmd->color_start, +             cmd->color_format, +             cmd->depth_format); + +   ASSERT_ALIGN16(cmd->color_start); +   ASSERT_ALIGN16(cmd->depth_start); + +   spu.fb.color_start = cmd->color_start; +   spu.fb.depth_start = cmd->depth_start; +   spu.fb.color_format = cmd->color_format; +   spu.fb.depth_format = cmd->depth_format; +   spu.fb.width = cmd->width; +   spu.fb.height = cmd->height; +   spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; +   spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; + +   switch (spu.fb.depth_format) { +   case PIPE_FORMAT_Z32_UNORM: +      spu.fb.zsize = 4; +      spu.fb.zscale = (float) 0xffffffffu; +      break; +   case PIPE_FORMAT_Z24S8_UNORM: +   case PIPE_FORMAT_S8Z24_UNORM: +   case PIPE_FORMAT_Z24X8_UNORM: +   case PIPE_FORMAT_X8Z24_UNORM: +      spu.fb.zsize = 4; +      spu.fb.zscale = (float) 0x00ffffffu; +      break; +   case PIPE_FORMAT_Z16_UNORM: +      spu.fb.zsize = 2; +      spu.fb.zscale = (float) 0xffffu; +      break; +   default: +      spu.fb.zsize = 0; +      break; +   } +} + + +/** + * Tex texture mask_s/t and scale_s/t fields depend on the texture size and + * sampler wrap modes. + */ +static void +update_tex_masks(struct spu_texture *texture, +                 const struct pipe_sampler_state *sampler) +{ +   uint i; + +   for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { +      int width = texture->level[i].width; +      int height = texture->level[i].height; + +      if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) +         texture->level[i].mask_s = spu_splats(width - 1); +      else +         texture->level[i].mask_s = spu_splats(~0); + +      if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT) +         texture->level[i].mask_t = spu_splats(height - 1); +      else +         texture->level[i].mask_t = spu_splats(~0); + +      if (sampler->normalized_coords) { +         texture->level[i].scale_s = spu_splats((float) width); +         texture->level[i].scale_t = spu_splats((float) height); +      } +      else { +         texture->level[i].scale_s = spu_splats(1.0f); +         texture->level[i].scale_t = spu_splats(1.0f); +      } +   } +} + + +static void +cmd_state_sampler(const struct cell_command_sampler *sampler) +{ +   uint unit = sampler->unit; + +   D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit); + +   spu.sampler[unit] = sampler->state; + +   switch (spu.sampler[unit].min_img_filter) { +   case PIPE_TEX_FILTER_LINEAR: +      spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear; +      break; +   case PIPE_TEX_FILTER_ANISO: +      /* fall-through, for now */ +   case PIPE_TEX_FILTER_NEAREST: +      spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest; +      break; +   default: +      ASSERT(0); +   } + +   switch (spu.sampler[sampler->unit].mag_img_filter) { +   case PIPE_TEX_FILTER_LINEAR: +      spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear; +      break; +   case PIPE_TEX_FILTER_ANISO: +      /* fall-through, for now */ +   case PIPE_TEX_FILTER_NEAREST: +      spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest; +      break; +   default: +      ASSERT(0); +   } + +   switch (spu.sampler[sampler->unit].min_mip_filter) { +   case PIPE_TEX_MIPFILTER_NEAREST: +   case PIPE_TEX_MIPFILTER_LINEAR: +      spu.sample_texture_2d[unit] = sample_texture_2d_lod; +      break; +   case PIPE_TEX_MIPFILTER_NONE: +      spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit]; +      break; +   default: +      ASSERT(0); +   } + +   update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); +} + + +static void +cmd_state_texture(const struct cell_command_texture *texture) +{ +   const uint unit = texture->unit; +   uint i; + +   D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit); + +   spu.texture[unit].max_level = 0; +   spu.texture[unit].target = texture->target; + +   for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { +      uint width = texture->width[i]; +      uint height = texture->height[i]; +      uint depth = texture->depth[i]; + +      D_PRINTF(CELL_DEBUG_CMD, "  LEVEL %u: at %p  size[0] %u x %u\n", i, +             texture->start[i], texture->width[i], texture->height[i]); + +      spu.texture[unit].level[i].start = texture->start[i]; +      spu.texture[unit].level[i].width = width; +      spu.texture[unit].level[i].height = height; +      spu.texture[unit].level[i].depth = depth; + +      spu.texture[unit].level[i].tiles_per_row = +         (width + TILE_SIZE - 1) / TILE_SIZE; + +      spu.texture[unit].level[i].bytes_per_image = +         4 * align(width, TILE_SIZE) * align(height, TILE_SIZE) * depth; + +      spu.texture[unit].level[i].max_s = spu_splats((int) width - 1); +      spu.texture[unit].level[i].max_t = spu_splats((int) height - 1); + +      if (texture->start[i]) +         spu.texture[unit].max_level = i; +   } + +   update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); +} + + +static void +cmd_state_vertex_info(const struct vertex_info *vinfo) +{ +   D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); +   ASSERT(vinfo->num_attribs >= 1); +   ASSERT(vinfo->num_attribs <= 8); +   memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); +} + + +static void +cmd_state_vs_array_info(const struct cell_array_info *vs_info) +{ +   const unsigned attr = vs_info->attr; + +   ASSERT(attr < PIPE_MAX_ATTRIBS); +   draw.vertex_fetch.src_ptr[attr] = vs_info->base; +   draw.vertex_fetch.pitch[attr] = vs_info->pitch; +   draw.vertex_fetch.size[attr] = vs_info->size; +   draw.vertex_fetch.code_offset[attr] = vs_info->function_offset; +   draw.vertex_fetch.dirty = 1; +} + + +static void +cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) +{ +   mfc_get(attribute_fetch_code_buffer, +           (unsigned int) code->base,  /* src */ +           code->size, +           TAG_BATCH_BUFFER, +           0, /* tid */ +           0  /* rid */); +   wait_on_mask(1 << TAG_BATCH_BUFFER); + +   draw.vertex_fetch.code = attribute_fetch_code_buffer; +} + + +static void +cmd_finish(void) +{ +   D_PRINTF(CELL_DEBUG_CMD, "FINISH\n"); +   really_clear_tiles(0); +   /* wait for all outstanding DMAs to finish */ +   mfc_write_tag_mask(~0); +   mfc_read_tag_status_all(); +   /* send mbox message to PPU */ +   spu_write_out_mbox(CELL_CMD_FINISH); +} + + +/** + * Execute a batch of commands which was sent to us by the PPU. + * See the cell_emit_state.c code to see where the commands come from. + * + * The opcode param encodes the location of the buffer and its size. + */ +static void +cmd_batch(uint opcode) +{ +   const uint buf = (opcode >> 8) & 0xff; +   uint size = (opcode >> 16); +   qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB; +   const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); +   uint pos; + +   D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n", +             buf, size, spu.init.buffers[buf]); + +   ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); + +   ASSERT_ALIGN16(spu.init.buffers[buf]); + +   size = ROUNDUP16(size); + +   ASSERT_ALIGN16(spu.init.buffers[buf]); + +   mfc_get(buffer,  /* dest */ +           (unsigned int) spu.init.buffers[buf],  /* src */ +           size, +           TAG_BATCH_BUFFER, +           0, /* tid */ +           0  /* rid */); +   wait_on_mask(1 << TAG_BATCH_BUFFER); + +   /* Tell PPU we're done copying the buffer to local store */ +   D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf); +   release_buffer(buf); + +   /* +    * Loop over commands in the batch buffer +    */ +   for (pos = 0; pos < usize; /* no incr */) { +      switch (si_to_uint(buffer[pos])) { +      /* +       * rendering commands +       */ +      case CELL_CMD_CLEAR_SURFACE: +         { +            struct cell_command_clear_surface *clr +               = (struct cell_command_clear_surface *) &buffer[pos]; +            cmd_clear_surface(clr); +            pos += sizeof(*clr) / 16; +         } +         break; +      case CELL_CMD_RENDER: +         { +            struct cell_command_render *render +               = (struct cell_command_render *) &buffer[pos]; +            uint pos_incr; +            cmd_render(render, &pos_incr); +            pos += ((pos_incr+1)&~1) / 2; // should 'fix' cmd_render return +         } +         break; +      /* +       * state-update commands +       */ +      case CELL_CMD_STATE_FRAMEBUFFER: +         { +            struct cell_command_framebuffer *fb +               = (struct cell_command_framebuffer *) &buffer[pos]; +            cmd_state_framebuffer(fb); +            pos += sizeof(*fb) / 16; +         } +         break; +      case CELL_CMD_STATE_FRAGMENT_OPS: +         { +            struct cell_command_fragment_ops *fops +               = (struct cell_command_fragment_ops *) &buffer[pos]; +            cmd_state_fragment_ops(fops); +            /* This is a variant-sized command */ +            pos += ROUNDUP16(sizeof(*fops) + fops->total_code_size) / 16; +         } +         break; +      case CELL_CMD_STATE_FRAGMENT_PROGRAM: +         { +            struct cell_command_fragment_program *fp +               = (struct cell_command_fragment_program *) &buffer[pos]; +            cmd_state_fragment_program(fp); +            pos += sizeof(*fp) / 16; +         } +         break; +      case CELL_CMD_STATE_FS_CONSTANTS: +         pos = cmd_state_fs_constants(buffer, pos); +         break; +      case CELL_CMD_STATE_RASTERIZER: +         { +            struct cell_command_rasterizer *rast = +               (struct cell_command_rasterizer *) &buffer[pos]; +            spu.rasterizer = rast->rasterizer; +            pos += sizeof(*rast) / 16; +         } +         break; +      case CELL_CMD_STATE_SAMPLER: +         { +            struct cell_command_sampler *sampler +               = (struct cell_command_sampler *) &buffer[pos]; +            cmd_state_sampler(sampler); +            pos += sizeof(*sampler) / 16; +         } +         break; +      case CELL_CMD_STATE_TEXTURE: +         { +            struct cell_command_texture *texture +               = (struct cell_command_texture *) &buffer[pos]; +            cmd_state_texture(texture); +            pos += sizeof(*texture) / 16; +         } +         break; +      case CELL_CMD_STATE_VERTEX_INFO: +         cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); +         pos += 1 + ROUNDUP16(sizeof(struct vertex_info)) / 16; +         break; +      case CELL_CMD_STATE_VIEWPORT: +         (void) memcpy(& draw.viewport, &buffer[pos+1], +                       sizeof(struct pipe_viewport_state)); +         pos += 1 + ROUNDUP16(sizeof(struct pipe_viewport_state)) / 16; +         break; +      case CELL_CMD_STATE_UNIFORMS: +         draw.constants = (const float (*)[4]) (uintptr_t)spu_extract((vector unsigned int)buffer[pos+1],0); +         pos += 2; +         break; +      case CELL_CMD_STATE_VS_ARRAY_INFO: +         cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); +         pos += 1 + ROUNDUP16(sizeof(struct cell_array_info)) / 16; +         break; +      case CELL_CMD_STATE_BIND_VS: +#if 0 +         spu_bind_vertex_shader(&draw, +                                (struct cell_shader_info *) &buffer[pos+1]); +#endif +         pos += 1 + ROUNDUP16(sizeof(struct cell_shader_info)) / 16; +         break; +      case CELL_CMD_STATE_ATTRIB_FETCH: +         cmd_state_attrib_fetch((struct cell_attribute_fetch_code *) +                                &buffer[pos+1]); +         pos += 1 + ROUNDUP16(sizeof(struct cell_attribute_fetch_code)) / 16; +         break; +      /* +       * misc commands +       */ +      case CELL_CMD_FINISH: +         cmd_finish(); +         pos += 1; +         break; +      case CELL_CMD_FENCE: +         { +            struct cell_command_fence *fence_cmd = +               (struct cell_command_fence *) &buffer[pos]; +            cmd_fence(fence_cmd); +            pos += sizeof(*fence_cmd) / 16; +         } +         break; +      case CELL_CMD_RELEASE_VERTS: +         { +            struct cell_command_release_verts *release +               = (struct cell_command_release_verts *) &buffer[pos]; +            cmd_release_verts(release); +            pos += sizeof(*release) / 16; +         } +         break; +      case CELL_CMD_FLUSH_BUFFER_RANGE: { +	 struct cell_buffer_range *br = (struct cell_buffer_range *) +	     &buffer[pos+1]; + +	 spu_dcache_mark_dirty((unsigned) br->base, br->size); +         pos += 1 + ROUNDUP16(sizeof(struct cell_buffer_range)) / 16; +	 break; +      } +      default: +         printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, si_to_uint(buffer[pos])); +         ASSERT(0); +         break; +      } +   } + +   D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n"); +} + + +#define PERF 0 + + +/** + * Main loop for SPEs: Get a command, execute it, repeat. + */ +void +command_loop(void) +{ +   int exitFlag = 0; +   uint t0, t1; + +   D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n"); + +   while (!exitFlag) { +      unsigned opcode; + +      D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n"); + +      if (PERF) +         spu_write_decrementer(~0); + +      /* read/wait from mailbox */ +      opcode = (unsigned int) spu_read_in_mbox(); +      D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode); + +      if (PERF) +         t0 = spu_read_decrementer(); + +      switch (opcode & CELL_CMD_OPCODE_MASK) { +      case CELL_CMD_EXIT: +         D_PRINTF(CELL_DEBUG_CMD, "EXIT\n"); +         exitFlag = 1; +         break; +      case CELL_CMD_VS_EXECUTE: +#if 0 +         spu_execute_vertex_shader(&draw, &cmd.vs); +#endif +         break; +      case CELL_CMD_BATCH: +         cmd_batch(opcode); +         break; +      default: +         printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); +      } + +      if (PERF) { +         t1 = spu_read_decrementer(); +         printf("wait mbox time: %gms   batch time: %gms\n", +                (~0u - t0) * spu.init.inv_timebase, +                (t0 - t1) * spu.init.inv_timebase); +      } +   } + +   D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n"); + +   if (spu.init.debug_flags & CELL_DEBUG_CACHE) +      spu_dcache_report(); +} + +/* Initialize this module; we manage the fragment ops buffer here. */ +void +spu_command_init(void) +{ +   /* Install default/fallback fragment processing function. +    * This will normally be overriden by a code-gen'd function +    * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set. +    */ +   spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; +   spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; + +   /* Set up the basic empty buffer for code-gen'ed fragment ops */ +   spu.fragment_ops_code = NULL; +   spu.fragment_ops_code_size = 0; +} + +void +spu_command_close(void) +{ +   /* Deallocate the code-gen buffer for fragment ops, and reset the +    * fragment ops functions to their initial setting (just to leave +    * things in a good state). +    */ +   if (spu.fragment_ops_code != NULL) { +      free(spu.fragment_ops_code); +   } +   spu_command_init(); +} diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h new file mode 100644 index 0000000000..83dcdade28 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_command.h @@ -0,0 +1,35 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +extern void +command_loop(void); + +extern void +spu_command_init(void); + +extern void +spu_command_close(void); diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c new file mode 100644 index 0000000000..a6d67634fd --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_dcache.c @@ -0,0 +1,145 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "cell/common.h" +#include "spu_main.h" +#include "spu_dcache.h" + +#define CACHELINE_LOG2SIZE    7 +#define LINE_SIZE             (1U << 7) +#define ALIGN_MASK            (~(LINE_SIZE - 1)) + +#define CACHE_NAME            data +#define CACHED_TYPE           qword +#define CACHE_TYPE            CACHE_TYPE_RO +#define CACHE_SET_TAGID(set)  (((set) & 0x03) + TAG_DCACHE0) +#define CACHE_LOG2NNWAY       2 +#define CACHE_LOG2NSETS       6 +#ifdef DEBUG +#define CACHE_STATS           1 +#endif +#include <cache-api.h> + +/* Yes folks, this is ugly. + */ +#undef CACHE_NWAY +#undef CACHE_NSETS +#define CACHE_NAME            data +#define CACHE_NWAY            4 +#define CACHE_NSETS           (1U << 6) + + +/** + * Fetch between arbitrary number of bytes from an unaligned address + * + * \param dst   Destination data buffer + * \param ea    Main memory effective address of source data + * \param size  Number of bytes to read + * + * \warning + * As is hinted by the type of the \c dst pointer, this function writes + * multiples of 16-bytes. + */ +void +spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size) +{ +   const int shift = ea & 0x0f; +   const unsigned read_size = ROUNDUP16(size + shift); +   const unsigned last_read = ROUNDUP16(ea + size); +   const qword *const last_write = dst + (ROUNDUP16(size) / 16); +   unsigned i; + + +   if (shift == 0) { +      /* Data is already aligned.  Fetch directly into the destination buffer. +       */ +      for (i = 0; i < size; i += 16) { +         *(dst++) = cache_rd(data, ea + i); +      } +   } else { +      qword hi; + + +      /* Please exercise extreme caution when modifying this code.  This code +       * must not read past the end of the page containing the source data, +       * and it must not write more than ((size + 15) / 16) qwords to the +       * destination buffer. +       */ +      ea &= ~0x0f; +      hi = cache_rd(data, ea); +      for (i = 16; i < read_size; i += 16) { +         qword lo = cache_rd(data, ea + i); + +         *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), +                          (qword) spu_rlmaskqwbyte(lo, shift - 16)); +         hi = lo; +      } + +      if (dst != last_write) { +         *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), si_il(0)); +      } +   } +    +   ASSERT((ea + i) == last_read); +   ASSERT(dst == last_write); +} + + +/** + * Notify the cache that a range of main memory may have been modified + */ +void +spu_dcache_mark_dirty(unsigned ea, unsigned size) +{ +   unsigned i; +   const unsigned aligned_start = (ea & ALIGN_MASK); +   const unsigned aligned_end = (ea + size + (LINE_SIZE - 1))  +       & ALIGN_MASK; + + +   for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) { +      const unsigned entry = __cache_dir[i]; +      const unsigned addr = entry & ~0x0f; + +      __cache_dir[i] = ((addr >= aligned_start) && (addr < aligned_end)) +          ? (entry & ~CACHELINE_VALID) : entry; +   } +} + + +/** + * Print cache utilization report + */ +void +spu_dcache_report(void) +{ +#ifdef CACHE_STATS +   if (spu.init.id == 0) { +      printf("SPU 0: Texture cache report:\n"); +      cache_pr_stats(data); +   } +#endif +} + + diff --git a/src/gallium/drivers/cell/spu/spu_dcache.h b/src/gallium/drivers/cell/spu/spu_dcache.h new file mode 100644 index 0000000000..39a19eb31b --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_dcache.h @@ -0,0 +1,37 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SPU_DCACHE_H +#define SPU_DCACHE_H + +extern void +spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size); + +extern void +spu_dcache_mark_dirty(unsigned ea, unsigned size); + +extern void +spu_dcache_report(void); + +#endif /* SPU_DCACHE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c new file mode 100644 index 0000000000..e27df2dfb3 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -0,0 +1,1933 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * TGSI interpretor/executor. + * + * Flow control information: + * + * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) + * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special + * care since a condition may be true for some quad components but false + * for other components. + * + * We basically execute all statements (even if they're in the part of + * an IF/ELSE clause that's "not taken") and use a special mask to + * control writing to destination registers.  This is the ExecMask. + * See store_dest(). + * + * The ExecMask is computed from three other masks (CondMask, LoopMask and + * ContMask) which are controlled by the flow control instructions (namely: + * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). + * + * + * Authors: + *   Michal Krol + *   Brian Paul + */ + +#include <transpose_matrix4x4.h> +#include <simdmath/ceilf4.h> +#include <simdmath/cosf4.h> +#include <simdmath/divf4.h> +#include <simdmath/floorf4.h> +#include <simdmath/log2f4.h> +#include <simdmath/powf4.h> +#include <simdmath/sinf4.h> +#include <simdmath/sqrtf4.h> +#include <simdmath/truncf4.h> + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "spu_exec.h" +#include "spu_main.h" +#include "spu_vertex_shader.h" +#include "spu_dcache.h" +#include "cell/common.h" + +#define TILE_TOP_LEFT     0 +#define TILE_TOP_RIGHT    1 +#define TILE_BOTTOM_LEFT  2 +#define TILE_BOTTOM_RIGHT 3 + +/* + * Shorthand locations of various utility registers (_I = Index, _C = Channel) + */ +#define TEMP_0_I           TGSI_EXEC_TEMP_00000000_I +#define TEMP_0_C           TGSI_EXEC_TEMP_00000000_C +#define TEMP_7F_I          TGSI_EXEC_TEMP_7FFFFFFF_I +#define TEMP_7F_C          TGSI_EXEC_TEMP_7FFFFFFF_C +#define TEMP_80_I          TGSI_EXEC_TEMP_80000000_I +#define TEMP_80_C          TGSI_EXEC_TEMP_80000000_C +#define TEMP_FF_I          TGSI_EXEC_TEMP_FFFFFFFF_I +#define TEMP_FF_C          TGSI_EXEC_TEMP_FFFFFFFF_C +#define TEMP_1_I           TGSI_EXEC_TEMP_ONE_I +#define TEMP_1_C           TGSI_EXEC_TEMP_ONE_C +#define TEMP_2_I           TGSI_EXEC_TEMP_TWO_I +#define TEMP_2_C           TGSI_EXEC_TEMP_TWO_C +#define TEMP_128_I         TGSI_EXEC_TEMP_128_I +#define TEMP_128_C         TGSI_EXEC_TEMP_128_C +#define TEMP_M128_I        TGSI_EXEC_TEMP_MINUS_128_I +#define TEMP_M128_C        TGSI_EXEC_TEMP_MINUS_128_C +#define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I +#define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C +#define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I +#define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C +#define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I +#define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_R0            TGSI_EXEC_TEMP_R0 + +#define FOR_EACH_CHANNEL(CHAN)\ +   for (CHAN = 0; CHAN < 4; CHAN++) + +#define IS_CHANNEL_ENABLED(INST, CHAN)\ +   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IS_CHANNEL_ENABLED2(INST, CHAN)\ +   ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + +#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ +   FOR_EACH_CHANNEL( CHAN )\ +      if (IS_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ +   FOR_EACH_CHANNEL( CHAN )\ +      if (IS_CHANNEL_ENABLED2( INST, CHAN )) + + +/** The execution mask depends on the conditional mask and the loop mask */ +#define UPDATE_EXEC_MASK(MACH) \ +      MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + + +#define CHAN_X  0 +#define CHAN_Y  1 +#define CHAN_Z  2 +#define CHAN_W  3 + + + +/** + * Initialize machine state by expanding tokens to full instructions, + * allocating temporary storage, setting up constants, etc. + * After this, we can call spu_exec_machine_run() many times. + */ +void +spu_exec_machine_init(struct spu_exec_machine *mach, +                      uint numSamplers, +                      struct spu_sampler *samplers, +                      unsigned processor) +{ +   const qword zero = si_il(0); +   const qword not_zero = si_il(~0); + +   (void) numSamplers; +   mach->Samplers = samplers; +   mach->Processor = processor; +   mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; + +   /* Setup constants. */ +   mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; +   mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; +   mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1); +   mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31); + +   mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f); +   mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f); +   mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f); +   mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f); +} + + +static INLINE qword +micro_abs(qword src) +{ +   return si_rotmi(si_shli(src, 1), -1); +} + +static INLINE qword +micro_ceil(qword src) +{ +   return (qword) _ceilf4((vec_float4) src); +} + +static INLINE qword +micro_cos(qword src) +{ +   return (qword) _cosf4((vec_float4) src); +} + +static const qword br_shuf = { +   TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, +   TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, +   TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, +   TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, +   TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, +   TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, +   TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, +   TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, +}; + +static const qword bl_shuf = { +   TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, +   TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, +   TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, +   TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, +   TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, +   TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, +   TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, +   TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, +}; + +static const qword tl_shuf = { +   TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, +   TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, +   TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, +   TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, +   TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, +   TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, +   TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, +   TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, +}; + +static qword +micro_ddx(qword src) +{ +   qword bottom_right = si_shufb(src, src, br_shuf); +   qword bottom_left = si_shufb(src, src, bl_shuf); + +   return si_fs(bottom_right, bottom_left); +} + +static qword +micro_ddy(qword src) +{ +   qword top_left = si_shufb(src, src, tl_shuf); +   qword bottom_left = si_shufb(src, src, bl_shuf); + +   return si_fs(top_left, bottom_left); +} + +static INLINE qword +micro_div(qword src0, qword src1) +{ +   return (qword) _divf4((vec_float4) src0, (vec_float4) src1); +} + +static qword +micro_flr(qword src) +{ +   return (qword) _floorf4((vec_float4) src); +} + +static qword +micro_frc(qword src) +{ +   return si_fs(src, (qword) _floorf4((vec_float4) src)); +} + +static INLINE qword +micro_ge(qword src0, qword src1) +{ +   return si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); +} + +static qword +micro_lg2(qword src) +{ +   return (qword) _log2f4((vec_float4) src); +} + +static INLINE qword +micro_lt(qword src0, qword src1) +{ +   const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); + +   return si_xori(tmp, 0xff); +} + +static INLINE qword +micro_max(qword src0, qword src1) +{ +   return si_selb(src1, src0, si_fcgt(src0, src1)); +} + +static INLINE qword +micro_min(qword src0, qword src1) +{ +   return si_selb(src0, src1, si_fcgt(src0, src1)); +} + +static qword +micro_neg(qword src) +{ +   return si_xor(src, (qword) spu_splats(0x80000000)); +} + +static qword +micro_set_sign(qword src) +{ +   return si_or(src, (qword) spu_splats(0x80000000)); +} + +static qword +micro_pow(qword src0, qword src1) +{ +   return (qword) _powf4((vec_float4) src0, (vec_float4) src1); +} + +static qword +micro_rnd(qword src) +{ +   const qword half = (qword) spu_splats(0.5f); + +   /* May be able to use _roundf4.  There may be some difference, though. +    */ +   return (qword) _floorf4((vec_float4) si_fa(src, half)); +} + +static INLINE qword +micro_ishr(qword src0, qword src1) +{ +   return si_rotma(src0, si_sfi(src1, 0)); +} + +static qword +micro_trunc(qword src) +{ +   return (qword) _truncf4((vec_float4) src); +} + +static qword +micro_sin(qword src) +{ +   return (qword) _sinf4((vec_float4) src); +} + +static INLINE qword +micro_sqrt(qword src) +{ +   return (qword) _sqrtf4((vec_float4) src); +} + +static void +fetch_src_file_channel( +   const struct spu_exec_machine *mach, +   const uint file, +   const uint swizzle, +   const union spu_exec_channel *index, +   union spu_exec_channel *chan ) +{ +   switch( swizzle ) { +   case TGSI_EXTSWIZZLE_X: +   case TGSI_EXTSWIZZLE_Y: +   case TGSI_EXTSWIZZLE_Z: +   case TGSI_EXTSWIZZLE_W: +      switch( file ) { +      case TGSI_FILE_CONSTANT: { +         unsigned i; + +         for (i = 0; i < 4; i++) { +            const float *ptr = mach->Consts[index->i[i]]; +            float tmp[4]; + +            spu_dcache_fetch_unaligned((qword *) tmp, +                                       (uintptr_t)(ptr + swizzle), +                                       sizeof(float)); + +            chan->f[i] = tmp[0]; +         } +         break; +      } + +      case TGSI_FILE_INPUT: +         chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; +         chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; +         chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; +         chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; +         break; + +      case TGSI_FILE_TEMPORARY: +         chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; +         chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; +         chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; +         chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; +         break; + +      case TGSI_FILE_IMMEDIATE: +         ASSERT( index->i[0] < (int) mach->ImmLimit ); +         ASSERT( index->i[1] < (int) mach->ImmLimit ); +         ASSERT( index->i[2] < (int) mach->ImmLimit ); +         ASSERT( index->i[3] < (int) mach->ImmLimit ); + +         chan->f[0] = mach->Imms[index->i[0]][swizzle]; +         chan->f[1] = mach->Imms[index->i[1]][swizzle]; +         chan->f[2] = mach->Imms[index->i[2]][swizzle]; +         chan->f[3] = mach->Imms[index->i[3]][swizzle]; +         break; + +      case TGSI_FILE_ADDRESS: +         chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; +         chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; +         chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; +         chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; +         break; + +      case TGSI_FILE_OUTPUT: +         /* vertex/fragment output vars can be read too */ +         chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; +         chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; +         chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; +         chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; +         break; + +      default: +         ASSERT( 0 ); +      } +      break; + +   case TGSI_EXTSWIZZLE_ZERO: +      *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; +      break; + +   case TGSI_EXTSWIZZLE_ONE: +      *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; +      break; + +   default: +      ASSERT( 0 ); +   } +} + +static void +fetch_source( +   const struct spu_exec_machine *mach, +   union spu_exec_channel *chan, +   const struct tgsi_full_src_register *reg, +   const uint chan_index ) +{ +   union spu_exec_channel index; +   uint swizzle; + +   index.i[0] = +   index.i[1] = +   index.i[2] = +   index.i[3] = reg->SrcRegister.Index; + +   if (reg->SrcRegister.Indirect) { +      union spu_exec_channel index2; +      union spu_exec_channel indir_index; + +      index2.i[0] = +      index2.i[1] = +      index2.i[2] = +      index2.i[3] = reg->SrcRegisterInd.Index; + +      swizzle = tgsi_util_get_src_register_swizzle(®->SrcRegisterInd, +                                                   CHAN_X); +      fetch_src_file_channel( +         mach, +         reg->SrcRegisterInd.File, +         swizzle, +         &index2, +         &indir_index ); + +      index.q = si_a(index.q, indir_index.q); +   } + +   if( reg->SrcRegister.Dimension ) { +      switch( reg->SrcRegister.File ) { +      case TGSI_FILE_INPUT: +         index.q = si_mpyi(index.q, 17); +         break; +      case TGSI_FILE_CONSTANT: +         index.q = si_shli(index.q, 12); +         break; +      default: +         ASSERT( 0 ); +      } + +      index.i[0] += reg->SrcRegisterDim.Index; +      index.i[1] += reg->SrcRegisterDim.Index; +      index.i[2] += reg->SrcRegisterDim.Index; +      index.i[3] += reg->SrcRegisterDim.Index; + +      if (reg->SrcRegisterDim.Indirect) { +         union spu_exec_channel index2; +         union spu_exec_channel indir_index; + +         index2.i[0] = +         index2.i[1] = +         index2.i[2] = +         index2.i[3] = reg->SrcRegisterDimInd.Index; + +         swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); +         fetch_src_file_channel( +            mach, +            reg->SrcRegisterDimInd.File, +            swizzle, +            &index2, +            &indir_index ); + +         index.q = si_a(index.q, indir_index.q); +      } +   } + +   swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); +   fetch_src_file_channel( +      mach, +      reg->SrcRegister.File, +      swizzle, +      &index, +      chan ); + +   switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { +   case TGSI_UTIL_SIGN_CLEAR: +      chan->q = micro_abs(chan->q); +      break; + +   case TGSI_UTIL_SIGN_SET: +      chan->q = micro_set_sign(chan->q); +      break; + +   case TGSI_UTIL_SIGN_TOGGLE: +      chan->q = micro_neg(chan->q); +      break; + +   case TGSI_UTIL_SIGN_KEEP: +      break; +   } + +   if (reg->SrcRegisterExtMod.Complement) { +      chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); +   } +} + +static void +store_dest( +   struct spu_exec_machine *mach, +   const union spu_exec_channel *chan, +   const struct tgsi_full_dst_register *reg, +   const struct tgsi_full_instruction *inst, +   uint chan_index ) +{ +   union spu_exec_channel *dst; + +   switch( reg->DstRegister.File ) { +   case TGSI_FILE_NULL: +      return; + +   case TGSI_FILE_OUTPUT: +      dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] +                           + reg->DstRegister.Index].xyzw[chan_index]; +      break; + +   case TGSI_FILE_TEMPORARY: +      dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; +      break; + +   case TGSI_FILE_ADDRESS: +      dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; +      break; + +   default: +      ASSERT( 0 ); +      return; +   } + +   switch (inst->Instruction.Saturate) +   { +   case TGSI_SAT_NONE: +      if (mach->ExecMask & 0x1) +         dst->i[0] = chan->i[0]; +      if (mach->ExecMask & 0x2) +         dst->i[1] = chan->i[1]; +      if (mach->ExecMask & 0x4) +         dst->i[2] = chan->i[2]; +      if (mach->ExecMask & 0x8) +         dst->i[3] = chan->i[3]; +      break; + +   case TGSI_SAT_ZERO_ONE: +      /* XXX need to obey ExecMask here */ +      dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); +      dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q); +      break; + +   case TGSI_SAT_MINUS_PLUS_ONE: +      ASSERT( 0 ); +      break; + +   default: +      ASSERT( 0 ); +   } +} + +#define FETCH(VAL,INDEX,CHAN)\ +    fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + +#define STORE(VAL,INDEX,CHAN)\ +    store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + + +/** + * Execute ARB-style KIL which is predicated by a src register. + * Kill fragment if any of the four values is less than zero. + */ +static void +exec_kil(struct spu_exec_machine *mach, +         const struct tgsi_full_instruction *inst) +{ +   uint uniquemask; +   uint chan_index; +   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ +   union spu_exec_channel r[1]; + +   /* This mask stores component bits that were already tested. Note that +    * we test if the value is less than zero, so 1.0 and 0.0 need not to be +    * tested. */ +   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + +   for (chan_index = 0; chan_index < 4; chan_index++) +   { +      uint swizzle; +      uint i; + +      /* unswizzle channel */ +      swizzle = tgsi_util_get_full_src_register_extswizzle ( +                        &inst->FullSrcRegisters[0], +                        chan_index); + +      /* check if the component has not been already tested */ +      if (uniquemask & (1 << swizzle)) +         continue; +      uniquemask |= 1 << swizzle; + +      FETCH(&r[0], 0, chan_index); +      for (i = 0; i < 4; i++) +         if (r[0].f[i] < 0.0f) +            kilmask |= 1 << i; +   } + +   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + +/** + * Execute NVIDIA-style KIL which is predicated by a condition code. + * Kill fragment if the condition code is TRUE. + */ +static void +exec_kilp(struct tgsi_exec_machine *mach, +          const struct tgsi_full_instruction *inst) +{ +   uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + +   /* TODO: build kilmask from CC mask */ + +   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + +/* + * Fetch a texel using STR texture coordinates. + */ +static void +fetch_texel( struct spu_sampler *sampler, +             const union spu_exec_channel *s, +             const union spu_exec_channel *t, +             const union spu_exec_channel *p, +             float lodbias,  /* XXX should be float[4] */ +             union spu_exec_channel *r, +             union spu_exec_channel *g, +             union spu_exec_channel *b, +             union spu_exec_channel *a ) +{ +   qword rgba[4]; +   qword out[4]; + +   sampler->get_samples(sampler, s->f, t->f, p->f, lodbias,  +			(float (*)[4]) rgba); + +   _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba); +   r->q = out[0]; +   g->q = out[1]; +   b->q = out[2]; +   a->q = out[3]; +} + + +static void +exec_tex(struct spu_exec_machine *mach, +         const struct tgsi_full_instruction *inst, +         boolean biasLod, boolean projected) +{ +   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; +   union spu_exec_channel r[8]; +   uint chan_index; +   float lodBias; + +   /*   printf("Sampler %u unit %u\n", sampler, unit); */ + +   switch (inst->InstructionExtTexture.Texture) { +   case TGSI_TEXTURE_1D: + +      FETCH(&r[0], 0, CHAN_X); + +      if (projected) { +         FETCH(&r[1], 0, CHAN_W); +         r[0].q = micro_div(r[0].q, r[1].q); +      } + +      if (biasLod) { +         FETCH(&r[1], 0, CHAN_W); +         lodBias = r[2].f[0]; +      } +      else +         lodBias = 0.0; + +      fetch_texel(&mach->Samplers[unit], +                  &r[0], NULL, NULL, lodBias,  /* S, T, P, BIAS */ +                  &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ +      break; + +   case TGSI_TEXTURE_2D: +   case TGSI_TEXTURE_RECT: + +      FETCH(&r[0], 0, CHAN_X); +      FETCH(&r[1], 0, CHAN_Y); +      FETCH(&r[2], 0, CHAN_Z); + +      if (projected) { +         FETCH(&r[3], 0, CHAN_W); +         r[0].q = micro_div(r[0].q, r[3].q); +         r[1].q = micro_div(r[1].q, r[3].q); +         r[2].q = micro_div(r[2].q, r[3].q); +      } + +      if (biasLod) { +         FETCH(&r[3], 0, CHAN_W); +         lodBias = r[3].f[0]; +      } +      else +         lodBias = 0.0; + +      fetch_texel(&mach->Samplers[unit], +                  &r[0], &r[1], &r[2], lodBias,  /* inputs */ +                  &r[0], &r[1], &r[2], &r[3]);  /* outputs */ +      break; + +   case TGSI_TEXTURE_3D: +   case TGSI_TEXTURE_CUBE: + +      FETCH(&r[0], 0, CHAN_X); +      FETCH(&r[1], 0, CHAN_Y); +      FETCH(&r[2], 0, CHAN_Z); + +      if (projected) { +         FETCH(&r[3], 0, CHAN_W); +         r[0].q = micro_div(r[0].q, r[3].q); +         r[1].q = micro_div(r[1].q, r[3].q); +         r[2].q = micro_div(r[2].q, r[3].q); +      } + +      if (biasLod) { +         FETCH(&r[3], 0, CHAN_W); +         lodBias = r[3].f[0]; +      } +      else +         lodBias = 0.0; + +      fetch_texel(&mach->Samplers[unit], +                  &r[0], &r[1], &r[2], lodBias, +                  &r[0], &r[1], &r[2], &r[3]); +      break; + +   default: +      ASSERT (0); +   } + +   FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +      STORE( &r[chan_index], 0, chan_index ); +   } +} + + + +static void +constant_interpolation( +   struct spu_exec_machine *mach, +   unsigned attrib, +   unsigned chan ) +{ +   unsigned i; + +   for( i = 0; i < QUAD_SIZE; i++ ) { +      mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; +   } +} + +static void +linear_interpolation( +   struct spu_exec_machine *mach, +   unsigned attrib, +   unsigned chan ) +{ +   const float x = mach->QuadPos.xyzw[0].f[0]; +   const float y = mach->QuadPos.xyzw[1].f[0]; +   const float dadx = mach->InterpCoefs[attrib].dadx[chan]; +   const float dady = mach->InterpCoefs[attrib].dady[chan]; +   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; +   mach->Inputs[attrib].xyzw[chan].f[0] = a0; +   mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; +   mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; +   mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; +} + +static void +perspective_interpolation( +   struct spu_exec_machine *mach, +   unsigned attrib, +   unsigned chan ) +{ +   const float x = mach->QuadPos.xyzw[0].f[0]; +   const float y = mach->QuadPos.xyzw[1].f[0]; +   const float dadx = mach->InterpCoefs[attrib].dadx[chan]; +   const float dady = mach->InterpCoefs[attrib].dady[chan]; +   const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; +   const float *w = mach->QuadPos.xyzw[3].f; +   /* divide by W here */ +   mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; +   mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; +   mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; +   mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; +} + + +typedef void (* interpolation_func)( +   struct spu_exec_machine *mach, +   unsigned attrib, +   unsigned chan ); + +static void +exec_declaration(struct spu_exec_machine *mach, +                 const struct tgsi_full_declaration *decl) +{ +   if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { +      if( decl->Declaration.File == TGSI_FILE_INPUT ) { +         unsigned first, last, mask; +         interpolation_func interp; + +         first = decl->DeclarationRange.First; +         last = decl->DeclarationRange.Last; +         mask = decl->Declaration.UsageMask; + +         switch( decl->Declaration.Interpolate ) { +         case TGSI_INTERPOLATE_CONSTANT: +            interp = constant_interpolation; +            break; + +         case TGSI_INTERPOLATE_LINEAR: +            interp = linear_interpolation; +            break; + +         case TGSI_INTERPOLATE_PERSPECTIVE: +            interp = perspective_interpolation; +            break; + +         default: +            ASSERT( 0 ); +         } + +         if( mask == TGSI_WRITEMASK_XYZW ) { +            unsigned i, j; + +            for( i = first; i <= last; i++ ) { +               for( j = 0; j < NUM_CHANNELS; j++ ) { +                  interp( mach, i, j ); +               } +            } +         } +         else { +            unsigned i, j; + +            for( j = 0; j < NUM_CHANNELS; j++ ) { +               if( mask & (1 << j) ) { +                  for( i = first; i <= last; i++ ) { +                     interp( mach, i, j ); +                  } +               } +            } +         } +      } +   } +} + +static void +exec_instruction( +   struct spu_exec_machine *mach, +   const struct tgsi_full_instruction *inst, +   int *pc ) +{ +   uint chan_index; +   union spu_exec_channel r[8]; + +   (*pc)++; + +   switch (inst->Instruction.Opcode) { +   case TGSI_OPCODE_ARL: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +	 FETCH( &r[0], 0, chan_index ); +         r[0].q = si_cflts(r[0].q, 0); +	 STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_MOV: +   case TGSI_OPCODE_SWZ: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_LIT: +      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { +	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); +      } + +      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { +	 FETCH( &r[0], 0, CHAN_X ); +         if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { +            r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); +	    STORE( &r[0], 0, CHAN_Y ); +	 } + +         if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { +            FETCH( &r[1], 0, CHAN_Y ); +            r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + +            FETCH( &r[2], 0, CHAN_W ); +            r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q); +            r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q); +            r[1].q = micro_pow(r[1].q, r[2].q); + +            /* r0 = (r0 > 0.0) ? r1 : 0.0 +             */ +            r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); +            r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q, +                             r[0].q); +            STORE( &r[0], 0, CHAN_Z ); +         } +      } + +      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { +	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); +      } +      break; + +   case TGSI_OPCODE_RCP: +   /* TGSI_OPCODE_RECIP */ +      FETCH( &r[0], 0, CHAN_X ); +      r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +	 STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_RSQ: +   /* TGSI_OPCODE_RECIPSQRT */ +      FETCH( &r[0], 0, CHAN_X ); +      r[0].q = micro_sqrt(r[0].q); +      r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +	 STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_EXP: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_LOG: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_MUL: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) +      { +         FETCH(&r[0], 0, chan_index); +         FETCH(&r[1], 1, chan_index); + +         r[0].q = si_fm(r[0].q, r[1].q); + +         STORE(&r[0], 0, chan_index); +      } +      break; + +   case TGSI_OPCODE_ADD: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         FETCH( &r[1], 1, chan_index ); +         r[0].q = si_fa(r[0].q, r[1].q); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_DP3: +   /* TGSI_OPCODE_DOT3 */ +      FETCH( &r[0], 0, CHAN_X ); +      FETCH( &r[1], 1, CHAN_X ); +      r[0].q = si_fm(r[0].q, r[1].q); + +      FETCH( &r[1], 0, CHAN_Y ); +      FETCH( &r[2], 1, CHAN_Y ); +      r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + +      FETCH( &r[1], 0, CHAN_Z ); +      FETCH( &r[2], 1, CHAN_Z ); +      r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         STORE( &r[0], 0, chan_index ); +      } +      break; + +    case TGSI_OPCODE_DP4: +    /* TGSI_OPCODE_DOT4 */ +       FETCH(&r[0], 0, CHAN_X); +       FETCH(&r[1], 1, CHAN_X); + +      r[0].q = si_fm(r[0].q, r[1].q); + +       FETCH(&r[1], 0, CHAN_Y); +       FETCH(&r[2], 1, CHAN_Y); + +      r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + +       FETCH(&r[1], 0, CHAN_Z); +       FETCH(&r[2], 1, CHAN_Z); + +      r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + +       FETCH(&r[1], 0, CHAN_W); +       FETCH(&r[2], 1, CHAN_W); + +      r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +	 STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_DST: +      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { +	 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); +      } + +      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { +	 FETCH( &r[0], 0, CHAN_Y ); +	 FETCH( &r[1], 1, CHAN_Y); +      r[0].q = si_fm(r[0].q, r[1].q); +	 STORE( &r[0], 0, CHAN_Y ); +      } + +      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { +	 FETCH( &r[0], 0, CHAN_Z ); +	 STORE( &r[0], 0, CHAN_Z ); +      } + +      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { +	 FETCH( &r[0], 1, CHAN_W ); +	 STORE( &r[0], 0, CHAN_W ); +      } +      break; + +   case TGSI_OPCODE_MIN: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH(&r[0], 0, chan_index); +         FETCH(&r[1], 1, chan_index); + +         r[0].q = micro_min(r[0].q, r[1].q); + +         STORE(&r[0], 0, chan_index); +      } +      break; + +   case TGSI_OPCODE_MAX: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH(&r[0], 0, chan_index); +         FETCH(&r[1], 1, chan_index); + +         r[0].q = micro_max(r[0].q, r[1].q); + +         STORE(&r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_SLT: +   /* TGSI_OPCODE_SETLT */ +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         FETCH( &r[1], 1, chan_index ); + +         r[0].q = micro_ge(r[0].q, r[1].q); +         r[0].q = si_xori(r[0].q, 0xff); + +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_SGE: +   /* TGSI_OPCODE_SETGE */ +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         FETCH( &r[1], 1, chan_index ); +         r[0].q = micro_ge(r[0].q, r[1].q); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_MAD: +   /* TGSI_OPCODE_MADD */ +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         FETCH( &r[1], 1, chan_index ); +         FETCH( &r[2], 2, chan_index ); +         r[0].q = si_fma(r[0].q, r[1].q, r[2].q); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_SUB: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH(&r[0], 0, chan_index); +         FETCH(&r[1], 1, chan_index); + +         r[0].q = si_fs(r[0].q, r[1].q); + +         STORE(&r[0], 0, chan_index); +      } +      break; + +   case TGSI_OPCODE_LERP: +   /* TGSI_OPCODE_LRP */ +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH(&r[0], 0, chan_index); +         FETCH(&r[1], 1, chan_index); +         FETCH(&r[2], 2, chan_index); + +         r[1].q = si_fs(r[1].q, r[2].q); +         r[0].q = si_fma(r[0].q, r[1].q, r[2].q); + +         STORE(&r[0], 0, chan_index); +      } +      break; + +   case TGSI_OPCODE_CND: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_CND0: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_DOT2ADD: +      /* TGSI_OPCODE_DP2A */ +      ASSERT (0); +      break; + +   case TGSI_OPCODE_INDEX: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_NEGATE: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_FRAC: +   /* TGSI_OPCODE_FRC */ +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         r[0].q = micro_frc(r[0].q); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_CLAMP: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_FLOOR: +   /* TGSI_OPCODE_FLR */ +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         r[0].q = micro_flr(r[0].q); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_ROUND: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         r[0].q = micro_rnd(r[0].q); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_EXPBASE2: +    /* TGSI_OPCODE_EX2 */ +      FETCH(&r[0], 0, CHAN_X); + +      r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); + +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +	 STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_LOGBASE2: +   /* TGSI_OPCODE_LG2 */ +      FETCH( &r[0], 0, CHAN_X ); +      r[0].q = micro_lg2(r[0].q); +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_POWER: +      /* TGSI_OPCODE_POW */ +      FETCH(&r[0], 0, CHAN_X); +      FETCH(&r[1], 1, CHAN_X); + +      r[0].q = micro_pow(r[0].q, r[1].q); + +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +	 STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_CROSSPRODUCT: +      /* TGSI_OPCODE_XPD */ +      FETCH(&r[0], 0, CHAN_Y); +      FETCH(&r[1], 1, CHAN_Z); +      FETCH(&r[3], 0, CHAN_Z); +      FETCH(&r[4], 1, CHAN_Y); + +      /* r2 = (r0 * r1) - (r3 * r5) +       */ +      r[2].q = si_fm(r[3].q, r[5].q); +      r[2].q = si_fms(r[0].q, r[1].q, r[2].q); + +      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { +         STORE( &r[2], 0, CHAN_X ); +      } + +      FETCH(&r[2], 1, CHAN_X); +      FETCH(&r[5], 0, CHAN_X); + +      /* r3 = (r3 * r2) - (r1 * r5) +       */ +      r[1].q = si_fm(r[1].q, r[5].q); +      r[3].q = si_fms(r[3].q, r[2].q, r[1].q); + +      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { +         STORE( &r[3], 0, CHAN_Y ); +      } + +      /* r5 = (r5 * r4) - (r0 * r2) +       */ +      r[0].q = si_fm(r[0].q, r[2].q); +      r[5].q = si_fms(r[5].q, r[4].q, r[0].q); + +      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { +         STORE( &r[5], 0, CHAN_Z ); +      } + +      if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { +         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); +      } +      break; + +    case TGSI_OPCODE_MULTIPLYMATRIX: +       ASSERT (0); +       break; + +    case TGSI_OPCODE_ABS: +       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +          FETCH(&r[0], 0, chan_index); + +          r[0].q = micro_abs(r[0].q); + +          STORE(&r[0], 0, chan_index); +       } +       break; + +   case TGSI_OPCODE_RCC: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_DPH: +      FETCH(&r[0], 0, CHAN_X); +      FETCH(&r[1], 1, CHAN_X); + +      r[0].q = si_fm(r[0].q, r[1].q); + +      FETCH(&r[1], 0, CHAN_Y); +      FETCH(&r[2], 1, CHAN_Y); + +      r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + +      FETCH(&r[1], 0, CHAN_Z); +      FETCH(&r[2], 1, CHAN_Z); + +      r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + +      FETCH(&r[1], 1, CHAN_W); + +      r[0].q = si_fa(r[0].q, r[1].q); + +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +	 STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_COS: +      FETCH(&r[0], 0, CHAN_X); + +      r[0].q = micro_cos(r[0].q); + +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +	 STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_DDX: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         r[0].q = micro_ddx(r[0].q); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_DDY: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         r[0].q = micro_ddy(r[0].q); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_KILP: +      exec_kilp (mach, inst); +      break; + +   case TGSI_OPCODE_KIL: +      exec_kil (mach, inst); +      break; + +   case TGSI_OPCODE_PK2H: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_PK2US: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_PK4B: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_PK4UB: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_RFL: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_SEQ: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         FETCH( &r[1], 1, chan_index ); + +         r[0].q = si_fceq(r[0].q, r[1].q); + +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_SFL: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_SGT: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         FETCH( &r[1], 1, chan_index ); +         r[0].q = si_fcgt(r[0].q, r[1].q); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_SIN: +      FETCH( &r[0], 0, CHAN_X ); +      r[0].q = micro_sin(r[0].q); +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_SLE: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         FETCH( &r[1], 1, chan_index ); + +         r[0].q = si_fcgt(r[0].q, r[1].q); +         r[0].q = si_xori(r[0].q, 0xff); + +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_SNE: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         FETCH( &r[1], 1, chan_index ); + +         r[0].q = si_fceq(r[0].q, r[1].q); +         r[0].q = si_xori(r[0].q, 0xff); + +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_STR: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_TEX: +      /* simple texture lookup */ +      /* src[0] = texcoord */ +      /* src[1] = sampler unit */ +      exec_tex(mach, inst, FALSE, FALSE); +      break; + +   case TGSI_OPCODE_TXB: +      /* Texture lookup with lod bias */ +      /* src[0] = texcoord (src[0].w = load bias) */ +      /* src[1] = sampler unit */ +      exec_tex(mach, inst, TRUE, FALSE); +      break; + +   case TGSI_OPCODE_TXD: +      /* Texture lookup with explict partial derivatives */ +      /* src[0] = texcoord */ +      /* src[1] = d[strq]/dx */ +      /* src[2] = d[strq]/dy */ +      /* src[3] = sampler unit */ +      ASSERT (0); +      break; + +   case TGSI_OPCODE_TXL: +      /* Texture lookup with explit LOD */ +      /* src[0] = texcoord (src[0].w = load bias) */ +      /* src[1] = sampler unit */ +      exec_tex(mach, inst, TRUE, FALSE); +      break; + +   case TGSI_OPCODE_TXP: +      /* Texture lookup with projection */ +      /* src[0] = texcoord (src[0].w = projection) */ +      /* src[1] = sampler unit */ +      exec_tex(mach, inst, TRUE, TRUE); +      break; + +   case TGSI_OPCODE_UP2H: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_UP2US: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_UP4B: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_UP4UB: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_X2D: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_ARA: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_ARR: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_BRA: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_CAL: +      /* skip the call if no execution channels are enabled */ +      if (mach->ExecMask) { +         /* do the call */ + +         /* push the Cond, Loop, Cont stacks */ +         ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); +         mach->CondStack[mach->CondStackTop++] = mach->CondMask; +         ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); +         mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; +         ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); +         mach->ContStack[mach->ContStackTop++] = mach->ContMask; + +         ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); +         mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; + +         /* note that PC was already incremented above */ +         mach->CallStack[mach->CallStackTop++] = *pc; +         *pc = inst->InstructionExtLabel.Label; +      } +      break; + +   case TGSI_OPCODE_RET: +      mach->FuncMask &= ~mach->ExecMask; +      UPDATE_EXEC_MASK(mach); + +      if (mach->ExecMask == 0x0) { +         /* really return now (otherwise, keep executing */ + +         if (mach->CallStackTop == 0) { +            /* returning from main() */ +            *pc = -1; +            return; +         } +         *pc = mach->CallStack[--mach->CallStackTop]; + +         /* pop the Cond, Loop, Cont stacks */ +         ASSERT(mach->CondStackTop > 0); +         mach->CondMask = mach->CondStack[--mach->CondStackTop]; +         ASSERT(mach->LoopStackTop > 0); +         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; +         ASSERT(mach->ContStackTop > 0); +         mach->ContMask = mach->ContStack[--mach->ContStackTop]; +         ASSERT(mach->FuncStackTop > 0); +         mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + +         UPDATE_EXEC_MASK(mach); +      } +      break; + +   case TGSI_OPCODE_SSG: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_CMP: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH(&r[0], 0, chan_index); +         FETCH(&r[1], 1, chan_index); +         FETCH(&r[2], 2, chan_index); + +         /* r0 = (r0 < 0.0) ? r1 : r2 +          */ +         r[3].q = si_xor(r[3].q, r[3].q); +         r[0].q = micro_lt(r[0].q, r[3].q); +         r[0].q = si_selb(r[1].q, r[2].q, r[0].q); + +         STORE(&r[0], 0, chan_index); +      } +      break; + +   case TGSI_OPCODE_SCS: +      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { +         FETCH( &r[0], 0, CHAN_X ); +      } +      if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { +         r[1].q = micro_cos(r[0].q); +         STORE( &r[1], 0, CHAN_X ); +      } +      if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { +         r[1].q = micro_sin(r[0].q); +         STORE( &r[1], 0, CHAN_Y ); +      } +      if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { +         STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); +      } +      if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { +         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); +      } +      break; + +   case TGSI_OPCODE_NRM: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_DIV: +      ASSERT( 0 ); +      break; + +   case TGSI_OPCODE_DP2: +      FETCH( &r[0], 0, CHAN_X ); +      FETCH( &r[1], 1, CHAN_X ); +      r[0].q = si_fm(r[0].q, r[1].q); + +      FETCH( &r[1], 0, CHAN_Y ); +      FETCH( &r[2], 1, CHAN_Y ); +      r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_IF: +      /* push CondMask */ +      ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); +      mach->CondStack[mach->CondStackTop++] = mach->CondMask; +      FETCH( &r[0], 0, CHAN_X ); +      /* update CondMask */ +      if( ! r[0].u[0] ) { +         mach->CondMask &= ~0x1; +      } +      if( ! r[0].u[1] ) { +         mach->CondMask &= ~0x2; +      } +      if( ! r[0].u[2] ) { +         mach->CondMask &= ~0x4; +      } +      if( ! r[0].u[3] ) { +         mach->CondMask &= ~0x8; +      } +      UPDATE_EXEC_MASK(mach); +      /* Todo: If CondMask==0, jump to ELSE */ +      break; + +   case TGSI_OPCODE_ELSE: +      /* invert CondMask wrt previous mask */ +      { +         uint prevMask; +         ASSERT(mach->CondStackTop > 0); +         prevMask = mach->CondStack[mach->CondStackTop - 1]; +         mach->CondMask = ~mach->CondMask & prevMask; +         UPDATE_EXEC_MASK(mach); +         /* Todo: If CondMask==0, jump to ENDIF */ +      } +      break; + +   case TGSI_OPCODE_ENDIF: +      /* pop CondMask */ +      ASSERT(mach->CondStackTop > 0); +      mach->CondMask = mach->CondStack[--mach->CondStackTop]; +      UPDATE_EXEC_MASK(mach); +      break; + +   case TGSI_OPCODE_END: +      /* halt execution */ +      *pc = -1; +      break; + +   case TGSI_OPCODE_REP: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_ENDREP: +       ASSERT (0); +       break; + +   case TGSI_OPCODE_PUSHA: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_POPA: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_CEIL: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         r[0].q = micro_ceil(r[0].q); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_I2F: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         r[0].q = si_csflt(r[0].q, 0); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_NOT: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         r[0].q = si_xorbi(r[0].q, 0xff); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_TRUNC: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         r[0].q = micro_trunc(r[0].q); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_SHL: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         FETCH( &r[1], 1, chan_index ); + +         r[0].q = si_shl(r[0].q, r[1].q); + +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_SHR: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         FETCH( &r[1], 1, chan_index ); +         r[0].q = micro_ishr(r[0].q, r[1].q); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_AND: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         FETCH( &r[1], 1, chan_index ); +         r[0].q = si_and(r[0].q, r[1].q); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_OR: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         FETCH( &r[1], 1, chan_index ); +         r[0].q = si_or(r[0].q, r[1].q); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_MOD: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_XOR: +      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { +         FETCH( &r[0], 0, chan_index ); +         FETCH( &r[1], 1, chan_index ); +         r[0].q = si_xor(r[0].q, r[1].q); +         STORE( &r[0], 0, chan_index ); +      } +      break; + +   case TGSI_OPCODE_SAD: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_TXF: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_TXQ: +      ASSERT (0); +      break; + +   case TGSI_OPCODE_EMIT: +      mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; +      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; +      break; + +   case TGSI_OPCODE_ENDPRIM: +      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; +      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; +      break; + +   case TGSI_OPCODE_LOOP: +      /* fall-through (for now) */ +   case TGSI_OPCODE_BGNLOOP2: +      /* push LoopMask and ContMasks */ +      ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); +      mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; +      ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); +      mach->ContStack[mach->ContStackTop++] = mach->ContMask; +      break; + +   case TGSI_OPCODE_ENDLOOP: +      /* fall-through (for now at least) */ +   case TGSI_OPCODE_ENDLOOP2: +      /* Restore ContMask, but don't pop */ +      ASSERT(mach->ContStackTop > 0); +      mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; +      if (mach->LoopMask) { +         /* repeat loop: jump to instruction just past BGNLOOP */ +         *pc = inst->InstructionExtLabel.Label + 1; +      } +      else { +         /* exit loop: pop LoopMask */ +         ASSERT(mach->LoopStackTop > 0); +         mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; +         /* pop ContMask */ +         ASSERT(mach->ContStackTop > 0); +         mach->ContMask = mach->ContStack[--mach->ContStackTop]; +      } +      UPDATE_EXEC_MASK(mach); +      break; + +   case TGSI_OPCODE_BRK: +      /* turn off loop channels for each enabled exec channel */ +      mach->LoopMask &= ~mach->ExecMask; +      /* Todo: if mach->LoopMask == 0, jump to end of loop */ +      UPDATE_EXEC_MASK(mach); +      break; + +   case TGSI_OPCODE_CONT: +      /* turn off cont channels for each enabled exec channel */ +      mach->ContMask &= ~mach->ExecMask; +      /* Todo: if mach->LoopMask == 0, jump to end of loop */ +      UPDATE_EXEC_MASK(mach); +      break; + +   case TGSI_OPCODE_BGNSUB: +      /* no-op */ +      break; + +   case TGSI_OPCODE_ENDSUB: +      /* no-op */ +      break; + +   case TGSI_OPCODE_NOISE1: +      ASSERT( 0 ); +      break; + +   case TGSI_OPCODE_NOISE2: +      ASSERT( 0 ); +      break; + +   case TGSI_OPCODE_NOISE3: +      ASSERT( 0 ); +      break; + +   case TGSI_OPCODE_NOISE4: +      ASSERT( 0 ); +      break; + +   case TGSI_OPCODE_NOP: +      break; + +   default: +      ASSERT( 0 ); +   } +} + + +/** + * Run TGSI interpreter. + * \return bitmask of "alive" quad components + */ +uint +spu_exec_machine_run( struct spu_exec_machine *mach ) +{ +   uint i; +   int pc = 0; + +   mach->CondMask = 0xf; +   mach->LoopMask = 0xf; +   mach->ContMask = 0xf; +   mach->FuncMask = 0xf; +   mach->ExecMask = 0xf; + +   mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */ +   ASSERT(mach->CondStackTop == 0); +   ASSERT(mach->LoopStackTop == 0); +   ASSERT(mach->ContStackTop == 0); +   ASSERT(mach->CallStackTop == 0); + +   mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; +   mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; + +   if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { +      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; +      mach->Primitives[0] = 0; +   } + + +   /* execute declarations (interpolants) */ +   if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { +      for (i = 0; i < mach->NumDeclarations; i++) { +         union { +            struct tgsi_full_declaration decl; +            qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; +         } d ALIGN16_ATTRIB; +         unsigned ea = (unsigned) (mach->Declarations + pc); + +         spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); + +         exec_declaration( mach, &d.decl ); +      } +   } + +   /* execute instructions, until pc is set to -1 */ +   while (pc != -1) { +      union { +         struct tgsi_full_instruction inst; +         qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; +      } i ALIGN16_ATTRIB; +      unsigned ea = (unsigned) (mach->Instructions + pc); + +      spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); +      exec_instruction( mach, & i.inst, &pc ); +   } + +#if 0 +   /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ +   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { +      /* +       * Scale back depth component. +       */ +      for (i = 0; i < 4; i++) +         mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; +   } +#endif + +   return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; +} + + diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h new file mode 100644 index 0000000000..8605679940 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -0,0 +1,172 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#if !defined SPU_EXEC_H +#define SPU_EXEC_H + +#include "pipe/p_compiler.h" +#include "tgsi/tgsi_exec.h" + +#if defined __cplusplus +extern "C" { +#endif + +/** +  * Registers may be treated as float, signed int or unsigned int. +  */ +union spu_exec_channel +{ +   float    f[QUAD_SIZE]; +   int      i[QUAD_SIZE]; +   unsigned u[QUAD_SIZE]; +   qword    q; +}; + +/** +  * A vector[RGBA] of channels[4 pixels] +  */ +struct spu_exec_vector +{ +   union spu_exec_channel xyzw[NUM_CHANNELS]; +}; + +/** + * For fragment programs, information for computing fragment input + * values from plane equation of the triangle/line. + */ +struct spu_interp_coef +{ +   float a0[NUM_CHANNELS];	/* in an xyzw layout */ +   float dadx[NUM_CHANNELS]; +   float dady[NUM_CHANNELS]; +}; + + +struct softpipe_tile_cache;  /**< Opaque to TGSI */ + +/** + * Information for sampling textures, which must be implemented + * by code outside the TGSI executor. + */ +struct spu_sampler +{ +   const struct pipe_sampler_state *state; +   struct pipe_texture *texture; +   /** Get samples for four fragments in a quad */ +   void (*get_samples)(struct spu_sampler *sampler, +                       const float s[QUAD_SIZE], +                       const float t[QUAD_SIZE], +                       const float p[QUAD_SIZE], +                       float lodbias, +                       float rgba[NUM_CHANNELS][QUAD_SIZE]); +   void *pipe; /*XXX temporary*/ +   struct softpipe_tile_cache *cache; +}; + + +/** + * Run-time virtual machine state for executing TGSI shader. + */ +struct spu_exec_machine +{ +   /* +    * 32 program temporaries +    * 4  internal temporaries +    * 1  address +    */ +   struct spu_exec_vector       Temps[TGSI_EXEC_NUM_TEMPS  +                                      + TGSI_EXEC_NUM_TEMP_EXTRAS + 1] +       ALIGN16_ATTRIB; + +   struct spu_exec_vector       *Addrs; + +   struct spu_sampler           *Samplers; + +   float                         Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; +   unsigned                      ImmLimit; +   float                         (*Consts)[4]; +   struct spu_exec_vector       *Inputs; +   struct spu_exec_vector       *Outputs; +   unsigned                      Processor; + +   /* GEOMETRY processor only. */ +   unsigned                      *Primitives; + +   /* FRAGMENT processor only. */ +   const struct spu_interp_coef *InterpCoefs; +   struct spu_exec_vector       QuadPos; + +   /* Conditional execution masks */ +   uint CondMask;  /**< For IF/ELSE/ENDIF */ +   uint LoopMask;  /**< For BGNLOOP/ENDLOOP */ +   uint ContMask;  /**< For loop CONT statements */ +   uint FuncMask;  /**< For function calls */ +   uint ExecMask;  /**< = CondMask & LoopMask */ + +   /** Condition mask stack (for nested conditionals) */ +   uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; +   int CondStackTop; + +   /** Loop mask stack (for nested loops) */ +   uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; +   int LoopStackTop; + +   /** Loop continue mask stack (see comments in tgsi_exec.c) */ +   uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; +   int ContStackTop; + +   /** Function execution mask stack (for executing subroutine code) */ +   uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; +   int FuncStackTop; + +   /** Function call stack for saving/restoring the program counter */ +   uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; +   int CallStackTop; + +   struct tgsi_full_instruction *Instructions; +   uint NumInstructions; + +   struct tgsi_full_declaration *Declarations; +   uint NumDeclarations; +}; + + +extern void +spu_exec_machine_init(struct spu_exec_machine *mach, +                      uint numSamplers, +                      struct spu_sampler *samplers, +                      unsigned processor); + +extern uint +spu_exec_machine_run( struct spu_exec_machine *mach ); + + +#if defined __cplusplus +} /* extern "C" */ +#endif + +#endif /* SPU_EXEC_H */ diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c new file mode 100644 index 0000000000..ff3d609d25 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -0,0 +1,173 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +/** + * SPU functions accessed by shaders. + * + * Authors: Brian Paul + */ + + +#include <string.h> +#include <libmisc.h> +#include <math.h> +#include <cos14_v.h> +#include <sin14_v.h> +#include <simdmath/exp2f4.h> +#include <simdmath/log2f4.h> +#include <simdmath/powf4.h> + +#include "cell/common.h" +#include "spu_main.h" +#include "spu_funcs.h" +#include "spu_texture.h" + + +/** For "return"-ing four vectors */ +struct vec_4x4 +{ +   vector float v[4]; +}; + + +static vector float +spu_cos(vector float x) +{ +   return _cos14_v(x); +} + +static vector float +spu_sin(vector float x) +{ +   return _sin14_v(x); +} + +static vector float +spu_pow(vector float x, vector float y) +{ +   return _powf4(x, y); +} + +static vector float +spu_exp2(vector float x) +{ +   return _exp2f4(x); +} + +static vector float +spu_log2(vector float x) +{ +   return _log2f4(x); +} + + +static struct vec_4x4 +spu_tex_2d(vector float s, vector float t, vector float r, vector float q, +           unsigned unit) +{ +   struct vec_4x4 colors; +   (void) r; +   (void) q; +   spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); +   return colors; +} + +static struct vec_4x4 +spu_tex_3d(vector float s, vector float t, vector float r, vector float q, +           unsigned unit) +{ +   struct vec_4x4 colors; +   (void) r; +   (void) q; +   spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); +   return colors; +} + +static struct vec_4x4 +spu_tex_cube(vector float s, vector float t, vector float r, vector float q, +           unsigned unit) +{ +   struct vec_4x4 colors; +   (void) q; +   sample_texture_cube(s, t, r, unit, colors.v); +   return colors; +} + + +/** + * Add named function to list of "exported" functions that will be + * made available to the PPU-hosted code generator. + */ +static void +export_func(struct cell_spu_function_info *spu_functions, +            const char *name, void *addr) +{ +   uint n = spu_functions->num; +   ASSERT(strlen(name) < 16); +   strcpy(spu_functions->names[n], name); +   spu_functions->addrs[n] = (uint) addr; +   spu_functions->num++; +   ASSERT(spu_functions->num <= 16); +} + + +/** + * Return info about the SPU's function to the PPU / main memory. + * The PPU needs to know the address of some SPU-side functions so + * that we can generate shader code with function calls. + */ +void +return_function_info(void) +{ +   struct cell_spu_function_info funcs ALIGN16_ATTRIB; +   int tag = TAG_MISC; + +   ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ + +   funcs.num = 0; +   export_func(&funcs, "spu_cos", &spu_cos); +   export_func(&funcs, "spu_sin", &spu_sin); +   export_func(&funcs, "spu_pow", &spu_pow); +   export_func(&funcs, "spu_exp2", &spu_exp2); +   export_func(&funcs, "spu_log2", &spu_log2); +   export_func(&funcs, "spu_tex_2d", &spu_tex_2d); +   export_func(&funcs, "spu_tex_3d", &spu_tex_3d); +   export_func(&funcs, "spu_tex_cube", &spu_tex_cube); + +   /* Send the function info back to the PPU / main memory */ +   mfc_put((void *) &funcs,  /* src in local store */ +           (unsigned int) spu.init.spu_functions, /* dst in main memory */ +           sizeof(funcs),  /* bytes */ +           tag, +           0, /* tid */ +           0  /* rid */); +   wait_on_mask(1 << tag); +} + + + diff --git a/src/gallium/drivers/cell/spu/spu_funcs.h b/src/gallium/drivers/cell/spu/spu_funcs.h new file mode 100644 index 0000000000..3adb6ae99f --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_funcs.h @@ -0,0 +1,35 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef SPU_FUNCS_H +#define SPU_FUNCS_H + +extern void +return_function_info(void); + +#endif + diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c new file mode 100644 index 0000000000..97c86d194d --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -0,0 +1,117 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +/* main() for Cell SPU code */ + + +#include <stdio.h> +#include <libmisc.h> + +#include "pipe/p_defines.h" + +#include "spu_funcs.h" +#include "spu_command.h" +#include "spu_main.h" +#include "spu_per_fragment_op.h" +#include "spu_texture.h" +//#include "spu_test.h" +#include "cell/common.h" + + +/* +helpful headers: +/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h +/opt/cell/sdk/usr/include/libmisc.h +*/ + +struct spu_global spu; + + +static void +one_time_init(void) +{ +   memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); +   memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); +   invalidate_tex_cache(); +} + +/* In some versions of the SDK the SPE main takes 'unsigned long' as a + * parameter.  In others it takes 'unsigned long long'.  Use a define to + * select between the two. + */ +#ifdef SPU_MAIN_PARAM_LONG_LONG +typedef unsigned long long main_param_t; +#else +typedef unsigned long main_param_t; +#endif + +/** + * SPE entrypoint. + */ +int +main(main_param_t speid, main_param_t argp) +{ +   int tag = 0; + +   (void) speid; + +   ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); +   ASSERT(sizeof(struct cell_command_render) % 8 == 0); +   ASSERT(sizeof(struct cell_command_fragment_ops) % 8 == 0); +   ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0); + +   one_time_init(); +   spu_command_init(); + +   D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid); +   D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); + +   /* get initialization data */ +   mfc_get(&spu.init,  /* dest */ +           (unsigned int) argp, /* src */ +           sizeof(struct cell_init_info), /* bytes */ +           tag, +           0, /* tid */ +           0  /* rid */); +   wait_on_mask( 1 << tag ); + +   if (spu.init.id == 0) { +      return_function_info(); +   } + +#if 0 +   if (spu.init.id==0) +      spu_test_misc(spu.init.id); +#endif + +   command_loop(); + +   spu_command_close(); + +   return 0; +} diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h new file mode 100644 index 0000000000..33767e7c51 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -0,0 +1,254 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef SPU_MAIN_H +#define SPU_MAIN_H + + +#include <spu_mfcio.h> + +#include "cell/common.h" +#include "draw/draw_vertex.h" +#include "pipe/p_state.h" + + +#if DEBUG +/* These debug macros use the unusual construction ", ##__VA_ARGS__" + * which expands to the expected comma + args if variadic arguments + * are supplied, but swallows the comma if there are no variadic + * arguments (which avoids syntax errors that would otherwise occur). + */ +#define D_PRINTF(flag, format,...) \ +   if (spu.init.debug_flags & (flag)) \ +      printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) +#else +#define D_PRINTF(...) +#endif + + +/** + * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels. + * The data may be addressed through several different types. + */ +typedef union { +   ushort us[TILE_SIZE][TILE_SIZE]; +   uint   ui[TILE_SIZE][TILE_SIZE]; +   vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4]; +   vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2]; +} tile_t; + + +#define TILE_STATUS_CLEAR   1 +#define TILE_STATUS_DEFINED 2  /**< defined in FB, but not in local store */ +#define TILE_STATUS_CLEAN   3  /**< in local store, but not changed */ +#define TILE_STATUS_DIRTY   4  /**< modified locally, but not put back yet */ +#define TILE_STATUS_GETTING 5  /**< mfc_get() called but not yet arrived */ + + +/** Function for sampling textures */ +typedef void (*spu_sample_texture_2d_func)(vector float s, +                                           vector float t, +                                           uint unit, uint level, uint face, +                                           vector float colors[4]); + + +/** Function for performing per-fragment ops */ +typedef void (*spu_fragment_ops_func)(uint x, uint y, +                                      tile_t *colorTile, +                                      tile_t *depthStencilTile, +                                      vector float fragZ, +                                      vector float fragRed, +                                      vector float fragGreen, +                                      vector float fragBlue, +                                      vector float fragAlpha, +                                      vector unsigned int mask); + +/** Function for running fragment program */ +typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, +                                                         vector float *outputs, +                                                         vector float *constants); + + +struct spu_framebuffer +{ +   void *color_start;              /**< addr of color surface in main memory */ +   void *depth_start;              /**< addr of depth surface in main memory */ +   enum pipe_format color_format; +   enum pipe_format depth_format; +   uint width, height;             /**< size in pixels */ +   uint width_tiles, height_tiles; /**< width and height in tiles */ + +   uint color_clear_value; +   uint depth_clear_value; + +   uint zsize;                     /**< 0, 2 or 4 bytes per Z */ +   float zscale;                   /**< 65535.0, 2^24-1 or 2^32-1 */ +} ALIGN16_ATTRIB; + + +/** per-texture level info */ +struct spu_texture_level +{ +   void *start; +   ushort width, height, depth; +   ushort tiles_per_row; +   uint bytes_per_image; +   /** texcoord scale factors */ +   vector float scale_s, scale_t, scale_r; +   /** texcoord masks (if REPEAT then size-1, else ~0) */ +   vector signed int mask_s, mask_t, mask_r; +   /** texcoord clamp limits */ +   vector signed int max_s, max_t, max_r; +} ALIGN16_ATTRIB; + + +struct spu_texture +{ +   struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; +   uint max_level; +   uint target;  /**< PIPE_TEXTURE_x */ +} ALIGN16_ATTRIB; + + +/** + * All SPU global/context state will be in a singleton object of this type: + */ +struct spu_global +{ +   /** One-time init/constant info */ +   struct cell_init_info init; + +   /* +    * Current state +    */ +   struct spu_framebuffer fb; +   struct pipe_depth_stencil_alpha_state depth_stencil_alpha; +   struct pipe_blend_state blend; +   struct pipe_blend_color blend_color; +   struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; +   struct pipe_rasterizer_state rasterizer; +   struct spu_texture texture[PIPE_MAX_SAMPLERS]; +   struct vertex_info vertex_info; + +   /** Current color and Z tiles */ +   tile_t ctile ALIGN16_ATTRIB; +   tile_t ztile ALIGN16_ATTRIB; + +   /** Read depth/stencil tiles? */ +   boolean read_depth_stencil; + +   /** Current tiles' status */ +   ubyte cur_ctile_status, cur_ztile_status; + +   /** Status of all tiles in framebuffer */ +   ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; +   ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + +   /** Current fragment ops machine code, at 8-byte boundary */ +   uint *fragment_ops_code; +   uint fragment_ops_code_size; +   /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */ +   spu_fragment_ops_func fragment_ops[2]; + +   /** Current fragment program machine code, at 8-byte boundary */ +   uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB; +   /** Current fragment ops function */ +   spu_fragment_program_func fragment_program; + +   /** Current texture sampler function */ +   spu_sample_texture_2d_func sample_texture_2d[CELL_MAX_SAMPLERS]; +   spu_sample_texture_2d_func min_sample_texture_2d[CELL_MAX_SAMPLERS]; +   spu_sample_texture_2d_func mag_sample_texture_2d[CELL_MAX_SAMPLERS]; + +   /** Fragment program constants */ +   vector float constants[4 * CELL_MAX_CONSTANTS]; + +} ALIGN16_ATTRIB; + + +extern struct spu_global spu; + + + +/* DMA TAGS */ + +#define TAG_SURFACE_CLEAR     10 +#define TAG_VERTEX_BUFFER     11 +#define TAG_READ_TILE_COLOR   12 +#define TAG_READ_TILE_Z       13 +#define TAG_WRITE_TILE_COLOR  14 +#define TAG_WRITE_TILE_Z      15 +#define TAG_INDEX_BUFFER      16 +#define TAG_BATCH_BUFFER      17 +#define TAG_MISC              18 +#define TAG_DCACHE0           20 +#define TAG_DCACHE1           21 +#define TAG_DCACHE2           22 +#define TAG_DCACHE3           23 +#define TAG_FENCE             24 + + +static INLINE void +wait_on_mask(unsigned tagMask) +{ +   mfc_write_tag_mask( tagMask ); +   /* wait for completion of _any_ DMAs specified by tagMask */ +   mfc_read_tag_status_any(); +} + + +static INLINE void +wait_on_mask_all(unsigned tagMask) +{ +   mfc_write_tag_mask( tagMask ); +   /* wait for completion of _any_ DMAs specified by tagMask */ +   mfc_read_tag_status_all(); +} + + + + + +static INLINE void +memset16(ushort *d, ushort value, uint count) +{ +   uint i; +   for (i = 0; i < count; i++) +      d[i] = value; +} + + +static INLINE void +memset32(uint *d, uint value, uint count) +{ +   uint i; +   for (i = 0; i < count; i++) +      d[i] = value; +} + + +#endif /* SPU_MAIN_H */ diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c new file mode 100644 index 0000000000..eba9f95cf1 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -0,0 +1,631 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * \author Brian Paul + */ + + +#include <transpose_matrix4x4.h> +#include "pipe/p_format.h" +#include "spu_main.h" +#include "spu_colorpack.h" +#include "spu_per_fragment_op.h" + + +#define LINEAR_QUAD_LAYOUT 1 + + +static INLINE vector float +spu_min(vector float a, vector float b) +{ +   vector unsigned int m; +   m = spu_cmpgt(a, b);    /* m = a > b ? ~0 : 0 */ +   return spu_sel(a, b, m); +} + + +static INLINE vector float +spu_max(vector float a, vector float b) +{ +   vector unsigned int m; +   m = spu_cmpgt(a, b);    /* m = a > b ? ~0 : 0 */ +   return spu_sel(b, a, m); +} + + +/** + * Called by rasterizer for each quad after the shader has run.  Do + * all the per-fragment operations including alpha test, z test, + * stencil test, blend, colormask and logicops.  This is a + * fallback/debug function.  In reality we'll use a generated function + * produced by the PPU.  But this function is useful for + * debug/validation. + */ +void +spu_fallback_fragment_ops(uint x, uint y, +                          tile_t *colorTile, +                          tile_t *depthStencilTile, +                          vector float fragZ, +                          vector float fragR, +                          vector float fragG, +                          vector float fragB, +                          vector float fragA, +                          vector unsigned int mask) +{ +   vector float frag_aos[4]; +   unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */ +   unsigned int fragc0, fragc1, fragc2, fragc3;  /* fragment colors */ + +   /* +    * Do alpha test +    */ +   if (spu.depth_stencil_alpha.alpha.enabled) { +      vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref_value); +      vector unsigned int amask; + +      switch (spu.depth_stencil_alpha.alpha.func) { +      case PIPE_FUNC_LESS: +         amask = spu_cmpgt(ref, fragA);  /* mask = (fragA < ref) */ +         break; +      case PIPE_FUNC_GREATER: +         amask = spu_cmpgt(fragA, ref);  /* mask = (fragA > ref) */ +         break; +      case PIPE_FUNC_GEQUAL: +         amask = spu_cmpgt(ref, fragA); +         amask = spu_nor(amask, amask); +         break; +      case PIPE_FUNC_LEQUAL: +         amask = spu_cmpgt(fragA, ref); +         amask = spu_nor(amask, amask); +         break; +      case PIPE_FUNC_EQUAL: +         amask = spu_cmpeq(ref, fragA); +         break; +      case PIPE_FUNC_NOTEQUAL: +         amask = spu_cmpeq(ref, fragA); +         amask = spu_nor(amask, amask); +         break; +      case PIPE_FUNC_ALWAYS: +         amask = spu_splats(0xffffffffU); +         break; +      case PIPE_FUNC_NEVER: +         amask = spu_splats( 0x0U); +         break; +      default: +         ; +      } + +      mask = spu_and(mask, amask); +   } + + +   /* +    * Z and/or stencil testing... +    */ +   if (spu.depth_stencil_alpha.depth.enabled || +       spu.depth_stencil_alpha.stencil[0].enabled) { + +      /* get four Z/Stencil values from tile */ +      vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU); +      vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2]; +      vector unsigned int ifbZ = spu_and(ifbZS, mask24); +      vector unsigned int ifbS = spu_andc(ifbZS, mask24); + +      if (spu.depth_stencil_alpha.stencil[0].enabled) { +         /* do stencil test */ +         ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM); + +      } +      else if (spu.depth_stencil_alpha.depth.enabled) { +         /* do depth test */ + +         ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM || +                spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM); + +         vector unsigned int ifragZ; +         vector unsigned int zmask; + +         /* convert four fragZ from float to uint */ +         fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff)); +         ifragZ = spu_convtu(fragZ, 0); + +         /* do depth comparison, setting zmask with results */ +         switch (spu.depth_stencil_alpha.depth.func) { +         case PIPE_FUNC_LESS: +            zmask = spu_cmpgt(ifbZ, ifragZ);  /* mask = (ifragZ < ifbZ) */ +            break; +         case PIPE_FUNC_GREATER: +            zmask = spu_cmpgt(ifragZ, ifbZ);  /* mask = (ifbZ > ifragZ) */ +            break; +         case PIPE_FUNC_GEQUAL: +            zmask = spu_cmpgt(ifbZ, ifragZ); +            zmask = spu_nor(zmask, zmask); +            break; +         case PIPE_FUNC_LEQUAL: +            zmask = spu_cmpgt(ifragZ, ifbZ); +            zmask = spu_nor(zmask, zmask); +            break; +         case PIPE_FUNC_EQUAL: +            zmask = spu_cmpeq(ifbZ, ifragZ); +            break; +         case PIPE_FUNC_NOTEQUAL: +            zmask = spu_cmpeq(ifbZ, ifragZ); +            zmask = spu_nor(zmask, zmask); +            break; +         case PIPE_FUNC_ALWAYS: +            zmask = spu_splats(0xffffffffU); +            break; +         case PIPE_FUNC_NEVER: +            zmask = spu_splats( 0x0U); +            break; +         default: +            ; +         } + +         mask = spu_and(mask, zmask); + +         /* merge framebuffer Z and fragment Z according to the mask */ +         ifbZ = spu_or(spu_and(ifragZ, mask), +                       spu_andc(ifbZ, mask)); +      } + +      if (spu_extract(spu_orx(mask), 0)) { +         /* put new fragment Z/Stencil values back into Z/Stencil tile */ +         depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS); + +         spu.cur_ztile_status = TILE_STATUS_DIRTY; +      } +   } + + +   /* +    * If we'll need the current framebuffer/tile colors for blending +    * or logicop or colormask, fetch them now. +    */ +   if (spu.blend.blend_enable || +       spu.blend.logicop_enable || +       spu.blend.colormask != 0xf) { + +#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ +      fbc0 = colorTile->ui[y][x*2+0]; +      fbc1 = colorTile->ui[y][x*2+1]; +      fbc2 = colorTile->ui[y][x*2+2]; +      fbc3 = colorTile->ui[y][x*2+3]; +#else +      fbc0 = colorTile->ui[y+0][x+0]; +      fbc1 = colorTile->ui[y+0][x+1]; +      fbc2 = colorTile->ui[y+1][x+0]; +      fbc3 = colorTile->ui[y+1][x+1]; +#endif +   } + + +   /* +    * Do blending +    */ +   if (spu.blend.blend_enable) { +      /* blending terms, misc regs */ +      vector float term1r, term1g, term1b, term1a; +      vector float term2r, term2g, term2b, term2a; +      vector float one, tmp; + +      vector float fbRGBA[4];  /* current framebuffer colors */ + +      /* convert framebuffer colors from packed int to vector float */ +      { +         vector float temp[4]; /* float colors in AOS form */ +         switch (spu.fb.color_format) { +         case PIPE_FORMAT_B8G8R8A8_UNORM: +            temp[0] = spu_unpack_B8G8R8A8(fbc0); +            temp[1] = spu_unpack_B8G8R8A8(fbc1); +            temp[2] = spu_unpack_B8G8R8A8(fbc2); +            temp[3] = spu_unpack_B8G8R8A8(fbc3); +            break; +         case PIPE_FORMAT_A8R8G8B8_UNORM: +            temp[0] = spu_unpack_A8R8G8B8(fbc0); +            temp[1] = spu_unpack_A8R8G8B8(fbc1); +            temp[2] = spu_unpack_A8R8G8B8(fbc2); +            temp[3] = spu_unpack_A8R8G8B8(fbc3); +            break; +         default: +            ASSERT(0); +         } +         _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */ +      } + +      /* +       * Compute Src RGB terms (fragment color * factor) +       */ +      switch (spu.blend.rgb_src_factor) { +      case PIPE_BLENDFACTOR_ONE: +         term1r = fragR; +         term1g = fragG; +         term1b = fragB; +         break; +      case PIPE_BLENDFACTOR_ZERO: +         term1r = +         term1g = +         term1b = spu_splats(0.0f); +         break; +      case PIPE_BLENDFACTOR_SRC_COLOR: +         term1r = spu_mul(fragR, fragR); +         term1g = spu_mul(fragG, fragG); +         term1b = spu_mul(fragB, fragB); +         break; +      case PIPE_BLENDFACTOR_SRC_ALPHA: +         term1r = spu_mul(fragR, fragA); +         term1g = spu_mul(fragG, fragA); +         term1b = spu_mul(fragB, fragA); +         break; +      case PIPE_BLENDFACTOR_DST_COLOR: +         term1r = spu_mul(fragR, fbRGBA[0]); +         term1g = spu_mul(fragG, fbRGBA[1]); +         term1b = spu_mul(fragB, fbRGBA[1]); +         break; +      case PIPE_BLENDFACTOR_DST_ALPHA: +         term1r = spu_mul(fragR, fbRGBA[3]); +         term1g = spu_mul(fragG, fbRGBA[3]); +         term1b = spu_mul(fragB, fbRGBA[3]); +         break; +      case PIPE_BLENDFACTOR_CONST_COLOR: +         term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[0])); +         term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[1])); +         term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[2])); +         break; +      case PIPE_BLENDFACTOR_CONST_ALPHA: +         term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[3])); +         term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[3])); +         term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[3])); +         break; +      /* XXX more cases */ +      default: +         ASSERT(0); +      } + +      /* +       * Compute Src Alpha term (fragment alpha * factor) +       */ +      switch (spu.blend.alpha_src_factor) { +      case PIPE_BLENDFACTOR_ONE: +         term1a = fragA; +         break; +      case PIPE_BLENDFACTOR_SRC_COLOR: +         term1a = spu_splats(0.0f); +         break; +      case PIPE_BLENDFACTOR_SRC_ALPHA: +         term1a = spu_mul(fragA, fragA); +         break; +      case PIPE_BLENDFACTOR_DST_COLOR: +         /* fall-through */ +      case PIPE_BLENDFACTOR_DST_ALPHA: +         term1a = spu_mul(fragA, fbRGBA[3]); +         break; +      case PIPE_BLENDFACTOR_CONST_COLOR: +         /* fall-through */ +      case PIPE_BLENDFACTOR_CONST_ALPHA: +         term1a = spu_mul(fragR, spu_splats(spu.blend_color.color[3])); +         break; +      /* XXX more cases */ +      default: +         ASSERT(0); +      } + +      /* +       * Compute Dest RGB terms (framebuffer color * factor) +       */ +      switch (spu.blend.rgb_dst_factor) { +      case PIPE_BLENDFACTOR_ONE: +         term2r = fbRGBA[0]; +         term2g = fbRGBA[1]; +         term2b = fbRGBA[2]; +         break; +      case PIPE_BLENDFACTOR_ZERO: +         term2r = +         term2g = +         term2b = spu_splats(0.0f); +         break; +      case PIPE_BLENDFACTOR_SRC_COLOR: +         term2r = spu_mul(fbRGBA[0], fragR); +         term2g = spu_mul(fbRGBA[1], fragG); +         term2b = spu_mul(fbRGBA[2], fragB); +         break; +      case PIPE_BLENDFACTOR_SRC_ALPHA: +         term2r = spu_mul(fbRGBA[0], fragA); +         term2g = spu_mul(fbRGBA[1], fragA); +         term2b = spu_mul(fbRGBA[2], fragA); +         break; +      case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +         one = spu_splats(1.0f); +         tmp = spu_sub(one, fragA); +         term2r = spu_mul(fbRGBA[0], tmp); +         term2g = spu_mul(fbRGBA[1], tmp); +         term2b = spu_mul(fbRGBA[2], tmp); +         break; +      case PIPE_BLENDFACTOR_DST_COLOR: +         term2r = spu_mul(fbRGBA[0], fbRGBA[0]); +         term2g = spu_mul(fbRGBA[1], fbRGBA[1]); +         term2b = spu_mul(fbRGBA[2], fbRGBA[2]); +         break; +      case PIPE_BLENDFACTOR_DST_ALPHA: +         term2r = spu_mul(fbRGBA[0], fbRGBA[3]); +         term2g = spu_mul(fbRGBA[1], fbRGBA[3]); +         term2b = spu_mul(fbRGBA[2], fbRGBA[3]); +         break; +      case PIPE_BLENDFACTOR_CONST_COLOR: +         term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[0])); +         term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[1])); +         term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[2])); +         break; +      case PIPE_BLENDFACTOR_CONST_ALPHA: +         term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[3])); +         term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[3])); +         term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[3])); +         break; +       /* XXX more cases */ +      default: +         ASSERT(0); +      } + +      /* +       * Compute Dest Alpha term (framebuffer alpha * factor) +       */ +      switch (spu.blend.alpha_dst_factor) { +      case PIPE_BLENDFACTOR_ONE: +         term2a = fbRGBA[3]; +         break; +      case PIPE_BLENDFACTOR_SRC_COLOR: +         term2a = spu_splats(0.0f); +         break; +      case PIPE_BLENDFACTOR_SRC_ALPHA: +         term2a = spu_mul(fbRGBA[3], fragA); +         break; +      case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +         one = spu_splats(1.0f); +         tmp = spu_sub(one, fragA); +         term2a = spu_mul(fbRGBA[3], tmp); +         break; +      case PIPE_BLENDFACTOR_DST_COLOR: +         /* fall-through */ +      case PIPE_BLENDFACTOR_DST_ALPHA: +         term2a = spu_mul(fbRGBA[3], fbRGBA[3]); +         break; +      case PIPE_BLENDFACTOR_CONST_COLOR: +         /* fall-through */ +      case PIPE_BLENDFACTOR_CONST_ALPHA: +         term2a = spu_mul(fbRGBA[3], spu_splats(spu.blend_color.color[3])); +         break; +      /* XXX more cases */ +      default: +         ASSERT(0); +      } + +      /* +       * Combine Src/Dest RGB terms +       */ +      switch (spu.blend.rgb_func) { +      case PIPE_BLEND_ADD: +         fragR = spu_add(term1r, term2r); +         fragG = spu_add(term1g, term2g); +         fragB = spu_add(term1b, term2b); +         break; +      case PIPE_BLEND_SUBTRACT: +         fragR = spu_sub(term1r, term2r); +         fragG = spu_sub(term1g, term2g); +         fragB = spu_sub(term1b, term2b); +         break; +      case PIPE_BLEND_REVERSE_SUBTRACT: +         fragR = spu_sub(term2r, term1r); +         fragG = spu_sub(term2g, term1g); +         fragB = spu_sub(term2b, term1b); +         break; +      case PIPE_BLEND_MIN: +         fragR = spu_min(term1r, term2r); +         fragG = spu_min(term1g, term2g); +         fragB = spu_min(term1b, term2b); +         break; +      case PIPE_BLEND_MAX: +         fragR = spu_max(term1r, term2r); +         fragG = spu_max(term1g, term2g); +         fragB = spu_max(term1b, term2b); +         break; +      default: +         ASSERT(0); +      } + +      /* +       * Combine Src/Dest A term +       */ +      switch (spu.blend.alpha_func) { +      case PIPE_BLEND_ADD: +         fragA = spu_add(term1a, term2a); +         break; +      case PIPE_BLEND_SUBTRACT: +         fragA = spu_sub(term1a, term2a); +         break; +      case PIPE_BLEND_REVERSE_SUBTRACT: +         fragA = spu_sub(term2a, term1a); +         break; +      case PIPE_BLEND_MIN: +         fragA = spu_min(term1a, term2a); +         break; +      case PIPE_BLEND_MAX: +         fragA = spu_max(term1a, term2a); +         break; +      default: +         ASSERT(0); +      } +   } + + +   /* +    * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA. +    */ +#if 0 +   /* original code */ +   { +      vector float frag_soa[4]; +      frag_soa[0] = fragR; +      frag_soa[1] = fragG; +      frag_soa[2] = fragB; +      frag_soa[3] = fragA; +      _transpose_matrix4x4(frag_aos, frag_soa); +   } +#else +   /* short-cut relying on function parameter layout: */ +   _transpose_matrix4x4(frag_aos, &fragR); +   (void) fragG; +   (void) fragB; +#endif + +   /* +    * Pack fragment float colors into 32-bit RGBA words. +    */ +   switch (spu.fb.color_format) { +   case PIPE_FORMAT_A8R8G8B8_UNORM: +      fragc0 = spu_pack_A8R8G8B8(frag_aos[0]); +      fragc1 = spu_pack_A8R8G8B8(frag_aos[1]); +      fragc2 = spu_pack_A8R8G8B8(frag_aos[2]); +      fragc3 = spu_pack_A8R8G8B8(frag_aos[3]); +      break; +   case PIPE_FORMAT_B8G8R8A8_UNORM: +      fragc0 = spu_pack_B8G8R8A8(frag_aos[0]); +      fragc1 = spu_pack_B8G8R8A8(frag_aos[1]); +      fragc2 = spu_pack_B8G8R8A8(frag_aos[2]); +      fragc3 = spu_pack_B8G8R8A8(frag_aos[3]); +      break; +   default: +      fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n"); +      ASSERT(0); +   } + + +   /* +    * Do color masking +    */ +   if (spu.blend.colormask != 0xf) { +      uint cmask = 0x0; /* each byte corresponds to a color channel */ + +      /* Form bitmask depending on color buffer format and colormask bits */ +      switch (spu.fb.color_format) { +      case PIPE_FORMAT_A8R8G8B8_UNORM: +         if (spu.blend.colormask & PIPE_MASK_R) +            cmask |= 0x00ff0000; /* red */ +         if (spu.blend.colormask & PIPE_MASK_G) +            cmask |= 0x0000ff00; /* green */ +         if (spu.blend.colormask & PIPE_MASK_B) +            cmask |= 0x000000ff; /* blue */ +         if (spu.blend.colormask & PIPE_MASK_A) +            cmask |= 0xff000000; /* alpha */ +         break; +      case PIPE_FORMAT_B8G8R8A8_UNORM: +         if (spu.blend.colormask & PIPE_MASK_R) +            cmask |= 0x0000ff00; /* red */ +         if (spu.blend.colormask & PIPE_MASK_G) +            cmask |= 0x00ff0000; /* green */ +         if (spu.blend.colormask & PIPE_MASK_B) +            cmask |= 0xff000000; /* blue */ +         if (spu.blend.colormask & PIPE_MASK_A) +            cmask |= 0x000000ff; /* alpha */ +         break; +      default: +         ASSERT(0); +      } + +      /* +       * Apply color mask to the 32-bit packed colors. +       * if (cmask[i]) +       *    frag color[i] = frag color[i]; +       * else +       *    frag color[i] = framebuffer color[i]; +       */ +      fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask); +      fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask); +      fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask); +      fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask); +   } + + +   /* +    * Do logic ops +    */ +   if (spu.blend.logicop_enable) { +      /* XXX to do */ +      /* apply logicop to 32-bit packed colors (fragcx and fbcx) */ +   } + + +   /* +    * If mask is non-zero, mark tile as dirty. +    */ +   if (spu_extract(spu_orx(mask), 0)) { +      spu.cur_ctile_status = TILE_STATUS_DIRTY; +   } +   else { +      /* write no fragments */ +      return; +   } + + +   /* +    * Write new fragment/quad colors to the framebuffer/tile. +    * Only write pixels where the corresponding mask word is set. +    */ +#if LINEAR_QUAD_LAYOUT +   /* +    * Quad layout: +    *  +--+--+--+--+ +    *  |p0|p1|p2|p3|... +    *  +--+--+--+--+ +    */ +   if (spu_extract(mask, 0)) +      colorTile->ui[y][x*2] = fragc0; +   if (spu_extract(mask, 1)) +      colorTile->ui[y][x*2+1] = fragc1; +   if (spu_extract(mask, 2)) +      colorTile->ui[y][x*2+2] = fragc2; +   if (spu_extract(mask, 3)) +      colorTile->ui[y][x*2+3] = fragc3; +#else +   /* +    * Quad layout: +    *  +--+--+ +    *  |p0|p1|... +    *  +--+--+ +    *  |p2|p3|... +    *  +--+--+ +    */ +   if (spu_extract(mask, 0)) +      colorTile->ui[y+0][x+0] = fragc0; +   if (spu_extract(mask, 1)) +      colorTile->ui[y+0][x+1] = fragc1; +   if (spu_extract(mask, 2)) +      colorTile->ui[y+1][x+0] = fragc2; +   if (spu_extract(mask, 3)) +      colorTile->ui[y+1][x+1] = fragc3; +#endif +} diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h new file mode 100644 index 0000000000..f817abf046 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h @@ -0,0 +1,44 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef SPU_PER_FRAGMENT_OP +#define SPU_PER_FRAGMENT_OP + + +extern void +spu_fallback_fragment_ops(uint x, uint y, +                          tile_t *colorTile, +                          tile_t *depthStencilTile, +                          vector float fragZ, +                          vector float fragRed, +                          vector float fragGreen, +                          vector float fragBlue, +                          vector float fragAlpha, +                          vector unsigned int mask); + + +#endif /* SPU_PER_FRAGMENT_OP */ diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c new file mode 100644 index 0000000000..7c225e2f27 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -0,0 +1,295 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include <stdio.h> +#include <libmisc.h> +#include <spu_mfcio.h> + +#include "spu_main.h" +#include "spu_render.h" +#include "spu_tri.h" +#include "spu_tile.h" +#include "cell/common.h" +#include "util/u_memory.h" + + +/** + * Given a rendering command's bounding box (in pixels) compute the + * location of the corresponding screen tile bounding box. + */ +static INLINE void +tile_bounding_box(const struct cell_command_render *render, +                  uint *txmin, uint *tymin, +                  uint *box_num_tiles, uint *box_width_tiles) +{ +#if 0 +   /* Debug: full-window bounding box */ +   uint txmax = spu.fb.width_tiles - 1; +   uint tymax = spu.fb.height_tiles - 1; +   *txmin = 0; +   *tymin = 0; +   *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; +   *box_width_tiles = spu.fb.width_tiles; +   (void) render; +   (void) txmax; +   (void) tymax; +#else +   uint txmax, tymax, box_height_tiles; + +   *txmin = (uint) render->xmin / TILE_SIZE; +   *tymin = (uint) render->ymin / TILE_SIZE; +   txmax = (uint) render->xmax / TILE_SIZE; +   tymax = (uint) render->ymax / TILE_SIZE; +   if (txmax >= spu.fb.width_tiles) +      txmax = spu.fb.width_tiles-1; +   if (tymax >= spu.fb.height_tiles) +      tymax = spu.fb.height_tiles-1; +   *box_width_tiles = txmax - *txmin + 1; +   box_height_tiles = tymax - *tymin + 1; +   *box_num_tiles = *box_width_tiles * box_height_tiles; +#endif +#if 0 +   printf("SPU %u: bounds: %g, %g  ...  %g, %g\n", spu.init.id, +          render->xmin, render->ymin, render->xmax, render->ymax); +   printf("SPU %u: tiles:  %u, %u .. %u, %u\n", +           spu.init.id, *txmin, *tymin, txmax, tymax); +   ASSERT(render->xmin <= render->xmax); +   ASSERT(render->ymin <= render->ymax); +#endif +} + + +/** Check if the tile at (tx,ty) belongs to this SPU */ +static INLINE boolean +my_tile(uint tx, uint ty) +{ +   return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id; +} + + +/** + * Start fetching non-clear color/Z tiles from main memory + */ +static INLINE void +get_cz_tiles(uint tx, uint ty) +{ +   if (spu.read_depth_stencil) { +      if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { +         //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); +         get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); +         spu.cur_ztile_status = TILE_STATUS_GETTING; +      } +   } + +   if (spu.cur_ctile_status != TILE_STATUS_CLEAR) { +      //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty); +      get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0); +      spu.cur_ctile_status = TILE_STATUS_GETTING; +   } +} + + +/** + * Start putting dirty color/Z tiles back to main memory + */ +static INLINE void +put_cz_tiles(uint tx, uint ty) +{ +   if (spu.cur_ztile_status == TILE_STATUS_DIRTY) { +      /* tile was modified and needs to be written back */ +      //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty); +      put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1); +      spu.cur_ztile_status = TILE_STATUS_DEFINED; +   } +   else if (spu.cur_ztile_status == TILE_STATUS_GETTING) { +      /* tile was never used */ +      spu.cur_ztile_status = TILE_STATUS_DEFINED; +      //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty); +   } + +   if (spu.cur_ctile_status == TILE_STATUS_DIRTY) { +      /* tile was modified and needs to be written back */ +      //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty); +      put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0); +      spu.cur_ctile_status = TILE_STATUS_DEFINED; +   } +   else if (spu.cur_ctile_status == TILE_STATUS_GETTING) { +      /* tile was never used */ +      spu.cur_ctile_status = TILE_STATUS_DEFINED; +      //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty); +   } +} + + +/** + * Wait for 'put' of color/z tiles to complete. + */ +static INLINE void +wait_put_cz_tiles(void) +{ +   wait_on_mask(1 << TAG_WRITE_TILE_COLOR); +   if (spu.read_depth_stencil) { +      wait_on_mask(1 << TAG_WRITE_TILE_Z); +   } +} + + +/** + * Render primitives + * \param pos_incr  returns value indicating how may words to skip after + *                  this command in the batch buffer + */ +void +cmd_render(const struct cell_command_render *render, uint *pos_incr) +{ +   /* we'll DMA into these buffers */ +   ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; +   const uint vertex_size = render->vertex_size; /* in bytes */ +   /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; +   uint index_bytes; +   const ubyte *vertices; +   const ushort *indexes; +   uint i, j; +   uint num_tiles; + +   D_PRINTF(CELL_DEBUG_CMD, +            "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n", +            render->prim_type, +            render->num_verts, +            render->num_indexes, +            render->inline_verts); + +   ASSERT(sizeof(*render) % 4 == 0); +   ASSERT(total_vertex_bytes % 16 == 0); +   ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); +   ASSERT(render->num_indexes % 3 == 0); + + +   /* indexes are right after the render command in the batch buffer */ +   indexes = (const ushort *) (render + 1); +   index_bytes = ROUNDUP8(render->num_indexes * 2); +   *pos_incr = index_bytes / 8 + sizeof(*render) / 8; + + +   if (render->inline_verts) { +      /* Vertices are after indexes in batch buffer at next 16-byte addr */ +      vertices = (const ubyte *) render + (*pos_incr * 8); +      vertices = (const ubyte *) align_pointer((void *) vertices, 16); +      ASSERT_ALIGN16(vertices); +      *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8; +   } +   else { +      /* Begin DMA fetch of vertex buffer */ +      ubyte *src = spu.init.buffers[render->vertex_buf]; +      ubyte *dest = vertex_data; + +      /* skip vertex data we won't use */ +#if 01 +      src += render->min_index * vertex_size; +      dest += render->min_index * vertex_size; +      total_vertex_bytes -= render->min_index * vertex_size; +#endif +      ASSERT(total_vertex_bytes % 16 == 0); +      ASSERT_ALIGN16(dest); +      ASSERT_ALIGN16(src); + +      mfc_get(dest,   /* in vertex_data[] array */ +              (unsigned int) src,  /* src in main memory */ +              total_vertex_bytes,  /* size */ +              TAG_VERTEX_BUFFER, +              0, /* tid */ +              0  /* rid */); + +      vertices = vertex_data; + +      wait_on_mask(1 << TAG_VERTEX_BUFFER); +   } + + +   /** +    ** find tiles which intersect the prim bounding box +    **/ +   uint txmin, tymin, box_width_tiles, box_num_tiles; +   tile_bounding_box(render, &txmin, &tymin, +                     &box_num_tiles, &box_width_tiles); + + +   /* make sure any pending clears have completed */ +   wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ + + +   num_tiles = 0; + +   /** +    ** loop over tiles, rendering tris +    **/ +   for (i = 0; i < box_num_tiles; i++) { +      const uint tx = txmin + i % box_width_tiles; +      const uint ty = tymin + i / box_width_tiles; + +      ASSERT(tx < spu.fb.width_tiles); +      ASSERT(ty < spu.fb.height_tiles); + +      if (!my_tile(tx, ty)) +         continue; + +      num_tiles++; + +      spu.cur_ctile_status = spu.ctile_status[ty][tx]; +      spu.cur_ztile_status = spu.ztile_status[ty][tx]; + +      get_cz_tiles(tx, ty); + +      uint drawn = 0; + +      /* loop over tris */ +      for (j = 0; j < render->num_indexes; j += 3) { +         const float *v0, *v1, *v2; + +         v0 = (const float *) (vertices + indexes[j+0] * vertex_size); +         v1 = (const float *) (vertices + indexes[j+1] * vertex_size); +         v2 = (const float *) (vertices + indexes[j+2] * vertex_size); + +         drawn += tri_draw(v0, v1, v2, tx, ty); +      } + +      //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); + +      /* write color/z tiles back to main framebuffer, if dirtied */ +      put_cz_tiles(tx, ty); + +      wait_put_cz_tiles(); /* XXX seems unnecessary... */ + +      spu.ctile_status[ty][tx] = spu.cur_ctile_status; +      spu.ztile_status[ty][tx] = spu.cur_ztile_status; +   } + +   D_PRINTF(CELL_DEBUG_CMD, +            "RENDER done (%u tiles hit)\n", +            num_tiles); +} diff --git a/src/gallium/drivers/cell/spu/spu_render.h b/src/gallium/drivers/cell/spu/spu_render.h new file mode 100644 index 0000000000..493434f087 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_render.h @@ -0,0 +1,38 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef SPU_RENDER_H +#define SPU_RENDER_H + +#include "cell/common.h" + +extern void +cmd_render(const struct cell_command_render *render, uint *pos_incr); + +#endif /* SPU_RENDER_H */ + diff --git a/src/gallium/drivers/cell/spu/spu_shuffle.h b/src/gallium/drivers/cell/spu/spu_shuffle.h new file mode 100644 index 0000000000..74f2a0b6d2 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_shuffle.h @@ -0,0 +1,186 @@ +#ifndef SPU_SHUFFLE_H +#define SPU_SHUFFLE_H + +/* + * Generate shuffle patterns with minimal fuss. + * + * Based on ideas from  + * http://www.insomniacgames.com/tech/articles/0408/files/shuffles.pdf + * + * A-P indicates 0-15th position in first vector + * a-p indicates 0-15th position in second vector + * + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * |00|01|02|03|04|05|06|07|08|09|0a|0b|0c|0d|0e|0f| + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * |          A|          B|          C|          D| + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |    A|    B|    C|    D|    E|    F|    G|    H| + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * | A| B| C| D| E| F| G| H| I| J| K| L| M| N| O| P| + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * + * x or X indicates 0xff + * 8 indicates 0x80 + * 0 indicates 0x00 + * + * The macros SHUFFLE4() SHUFFLE8() and SHUFFLE16() provide a const vector  + * unsigned char literal suitable for use with spu_shuffle(). + * + * The macros SHUFB4() SHUFB8() and SHUFB16() provide a const qword vector  + * literal suitable for use with si_shufb(). + * + * + * For example : + * SHUFB4(A,A,A,A) + * expands to : + * ((const qword){0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3}) + *  + * SHUFFLE8(A,B,a,b,C,c,8,8) + * expands to : + * ((const vector unsigned char){0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13, + *				 0x04,0x05,0x14,0x15,0xe0,0xe0,0xe0,0xe0}) + * + */ + +#include <spu_intrinsics.h> + +#define SHUFFLE_PATTERN_4_A__  0x00, 0x01, 0x02, 0x03 +#define SHUFFLE_PATTERN_4_B__  0x04, 0x05, 0x06, 0x07 +#define SHUFFLE_PATTERN_4_C__  0x08, 0x09, 0x0a, 0x0b +#define SHUFFLE_PATTERN_4_D__  0x0c, 0x0d, 0x0e, 0x0f +#define SHUFFLE_PATTERN_4_a__  0x10, 0x11, 0x12, 0x13 +#define SHUFFLE_PATTERN_4_b__  0x14, 0x15, 0x16, 0x17 +#define SHUFFLE_PATTERN_4_c__  0x18, 0x19, 0x1a, 0x1b +#define SHUFFLE_PATTERN_4_d__  0x1c, 0x1d, 0x1e, 0x1f +#define SHUFFLE_PATTERN_4_X__  0xc0, 0xc0, 0xc0, 0xc0 +#define SHUFFLE_PATTERN_4_x__  0xc0, 0xc0, 0xc0, 0xc0 +#define SHUFFLE_PATTERN_4_0__  0x80, 0x80, 0x80, 0x80 +#define SHUFFLE_PATTERN_4_8__  0xe0, 0xe0, 0xe0, 0xe0 + +#define SHUFFLE_VECTOR_4__(A, B, C, D) \ +   SHUFFLE_PATTERN_4_##A##__, \ +   SHUFFLE_PATTERN_4_##B##__, \ +   SHUFFLE_PATTERN_4_##C##__, \ +   SHUFFLE_PATTERN_4_##D##__ + +#define SHUFFLE4(A, B, C, D) \ +   ((const vector unsigned char){ \ +      SHUFFLE_VECTOR_4__(A, B, C, D) \ +   }) + +#define SHUFB4(A, B, C, D) \ +   ((const qword){ \ +      SHUFFLE_VECTOR_4__(A, B, C, D) \ +   }) + + +#define SHUFFLE_PATTERN_8_A__  0x00, 0x01 +#define SHUFFLE_PATTERN_8_B__  0x02, 0x03 +#define SHUFFLE_PATTERN_8_C__  0x04, 0x05 +#define SHUFFLE_PATTERN_8_D__  0x06, 0x07 +#define SHUFFLE_PATTERN_8_E__  0x08, 0x09 +#define SHUFFLE_PATTERN_8_F__  0x0a, 0x0b +#define SHUFFLE_PATTERN_8_G__  0x0c, 0x0d +#define SHUFFLE_PATTERN_8_H__  0x0e, 0x0f +#define SHUFFLE_PATTERN_8_a__  0x10, 0x11 +#define SHUFFLE_PATTERN_8_b__  0x12, 0x13 +#define SHUFFLE_PATTERN_8_c__  0x14, 0x15 +#define SHUFFLE_PATTERN_8_d__  0x16, 0x17 +#define SHUFFLE_PATTERN_8_e__  0x18, 0x19 +#define SHUFFLE_PATTERN_8_f__  0x1a, 0x1b +#define SHUFFLE_PATTERN_8_g__  0x1c, 0x1d +#define SHUFFLE_PATTERN_8_h__  0x1e, 0x1f +#define SHUFFLE_PATTERN_8_X__  0xc0, 0xc0 +#define SHUFFLE_PATTERN_8_x__  0xc0, 0xc0 +#define SHUFFLE_PATTERN_8_0__  0x80, 0x80 +#define SHUFFLE_PATTERN_8_8__  0xe0, 0xe0 + + +#define SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ +   SHUFFLE_PATTERN_8_##A##__, \ +   SHUFFLE_PATTERN_8_##B##__, \ +   SHUFFLE_PATTERN_8_##C##__, \ +   SHUFFLE_PATTERN_8_##D##__, \ +   SHUFFLE_PATTERN_8_##E##__, \ +   SHUFFLE_PATTERN_8_##F##__, \ +   SHUFFLE_PATTERN_8_##G##__, \ +   SHUFFLE_PATTERN_8_##H##__ + +#define SHUFFLE8(A, B, C, D, E, F, G, H) \ +   ((const vector unsigned char){ \ +      SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ +   }) + +#define SHUFB8(A, B, C, D, E, F, G, H) \ +   ((const qword){ \ +      SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ +   }) + + +#define SHUFFLE_PATTERN_16_A__  0x00 +#define SHUFFLE_PATTERN_16_B__  0x01 +#define SHUFFLE_PATTERN_16_C__  0x02 +#define SHUFFLE_PATTERN_16_D__  0x03 +#define SHUFFLE_PATTERN_16_E__  0x04 +#define SHUFFLE_PATTERN_16_F__  0x05 +#define SHUFFLE_PATTERN_16_G__  0x06 +#define SHUFFLE_PATTERN_16_H__  0x07 +#define SHUFFLE_PATTERN_16_I__  0x08 +#define SHUFFLE_PATTERN_16_J__  0x09 +#define SHUFFLE_PATTERN_16_K__  0x0a +#define SHUFFLE_PATTERN_16_L__  0x0b +#define SHUFFLE_PATTERN_16_M__  0x0c +#define SHUFFLE_PATTERN_16_N__  0x0d +#define SHUFFLE_PATTERN_16_O__  0x0e +#define SHUFFLE_PATTERN_16_P__  0x0f +#define SHUFFLE_PATTERN_16_a__  0x10 +#define SHUFFLE_PATTERN_16_b__  0x11 +#define SHUFFLE_PATTERN_16_c__  0x12 +#define SHUFFLE_PATTERN_16_d__  0x13 +#define SHUFFLE_PATTERN_16_e__  0x14 +#define SHUFFLE_PATTERN_16_f__  0x15 +#define SHUFFLE_PATTERN_16_g__  0x16 +#define SHUFFLE_PATTERN_16_h__  0x17 +#define SHUFFLE_PATTERN_16_i__  0x18 +#define SHUFFLE_PATTERN_16_j__  0x19 +#define SHUFFLE_PATTERN_16_k__  0x1a +#define SHUFFLE_PATTERN_16_l__  0x1b +#define SHUFFLE_PATTERN_16_m__  0x1c +#define SHUFFLE_PATTERN_16_n__  0x1d +#define SHUFFLE_PATTERN_16_o__  0x1e +#define SHUFFLE_PATTERN_16_p__  0x1f +#define SHUFFLE_PATTERN_16_X__  0xc0 +#define SHUFFLE_PATTERN_16_x__  0xc0 +#define SHUFFLE_PATTERN_16_0__  0x80 +#define SHUFFLE_PATTERN_16_8__  0xe0 + +#define SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ +   SHUFFLE_PATTERN_16_##A##__, \ +   SHUFFLE_PATTERN_16_##B##__, \ +   SHUFFLE_PATTERN_16_##C##__, \ +   SHUFFLE_PATTERN_16_##D##__, \ +   SHUFFLE_PATTERN_16_##E##__, \ +   SHUFFLE_PATTERN_16_##F##__, \ +   SHUFFLE_PATTERN_16_##G##__, \ +   SHUFFLE_PATTERN_16_##H##__, \ +   SHUFFLE_PATTERN_16_##I##__, \ +   SHUFFLE_PATTERN_16_##J##__, \ +   SHUFFLE_PATTERN_16_##K##__, \ +   SHUFFLE_PATTERN_16_##L##__, \ +   SHUFFLE_PATTERN_16_##M##__, \ +   SHUFFLE_PATTERN_16_##N##__, \ +   SHUFFLE_PATTERN_16_##O##__, \ +   SHUFFLE_PATTERN_16_##P##__ + +#define SHUFFLE16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ +   ((const vector unsigned char){ \ +      SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ +   }) + +#define SHUFB16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ +   ((const qword){ \ +      SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ +   }) + +#endif diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c new file mode 100644 index 0000000000..69784c8978 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -0,0 +1,641 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include <math.h> + +#include "pipe/p_compiler.h" +#include "spu_main.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_colorpack.h" +#include "spu_dcache.h" + + +/** + * Mark all tex cache entries as invalid. + */ +void +invalidate_tex_cache(void) +{ +   uint lvl; +   for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) { +      uint unit = 0; +      uint bytes = 4 * spu.texture[unit].level[lvl].width +         * spu.texture[unit].level[lvl].height; + +      if (spu.texture[unit].target == PIPE_TEXTURE_CUBE) +         bytes *= 6; +      else if (spu.texture[unit].target == PIPE_TEXTURE_3D) +         bytes *= spu.texture[unit].level[lvl].depth; + +      spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes); +   } +} + + +/** + * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ... + * + * NOTE: in the typical case of bilinear filtering, the four texels + * are in a 2x2 group so we could get by with just two dcache fetches + * (two side-by-side texels per fetch).  But when bilinear filtering + * wraps around a texture edge, we'll probably need code like we have + * now. + * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time, + * it's quite likely that the four pixels in a quad will need some of the + * same texels.  So look into doing texture fetches for four pixels at + * a time. + */ +static void +get_four_texels(const struct spu_texture_level *tlevel, uint face, +                vec_int4 x, vec_int4 y, +                vec_uint4 *texels) +{ +   unsigned texture_ea = (uintptr_t) tlevel->start; +   const vec_int4 tile_x = spu_rlmask(x, -5);  /* tile_x = x / 32 */ +   const vec_int4 tile_y = spu_rlmask(y, -5);  /* tile_y = y / 32 */ +   const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ +   const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */ + +   const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row); +   const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t)); + +   qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); +   tile_offset = si_mpy((qword) tile_offset, tile_size); + +   qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x); +   texel_offset = si_mpyui(texel_offset, 4); +    +   vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset); +    +   texture_ea = texture_ea + face * tlevel->bytes_per_image; + +   spu_dcache_fetch_unaligned((qword *) & texels[0], +                              texture_ea + spu_extract(offset, 0), 4); +   spu_dcache_fetch_unaligned((qword *) & texels[1], +                              texture_ea + spu_extract(offset, 1), 4); +   spu_dcache_fetch_unaligned((qword *) & texels[2], +                              texture_ea + spu_extract(offset, 2), 4); +   spu_dcache_fetch_unaligned((qword *) & texels[3], +                              texture_ea + spu_extract(offset, 3), 4); +} + + +/** clamp vec to [0, max] */ +static INLINE vector signed int +spu_clamp(vector signed int vec, vector signed int max) +{ +   static const vector signed int zero = {0,0,0,0}; +   vector unsigned int c; +   c = spu_cmpgt(vec, zero);    /* c = vec > zero ? ~0 : 0 */ +   vec = spu_sel(zero, vec, c); +   c = spu_cmpgt(vec, max);    /* c = vec > max ? ~0 : 0 */ +   vec = spu_sel(vec, max, c); +   return vec; +} + + + +/** + * Do nearest texture sampling for four pixels. + * \param colors  returned colors in SOA format (rrrr, gggg, bbbb, aaaa). + */ +void +sample_texture_2d_nearest(vector float s, vector float t, +                          uint unit, uint level, uint face, +                          vector float colors[4]) +{ +   const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; +   vector float ss = spu_mul(s, tlevel->scale_s); +   vector float tt = spu_mul(t, tlevel->scale_t); +   vector signed int is = spu_convts(ss, 0); +   vector signed int it = spu_convts(tt, 0); +   vec_uint4 texels[4]; + +   /* PIPE_TEX_WRAP_REPEAT */ +   is = spu_and(is, tlevel->mask_s); +   it = spu_and(it, tlevel->mask_t); + +   /* PIPE_TEX_WRAP_CLAMP */ +   is = spu_clamp(is, tlevel->max_s); +   it = spu_clamp(it, tlevel->max_t); + +   get_four_texels(tlevel, face, is, it, texels); + +   /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */ +   spu_unpack_A8R8G8B8_transpose4(texels, colors); +} + + +/** + * Do bilinear texture sampling for four pixels. + * \param colors  returned colors in SOA format (rrrr, gggg, bbbb, aaaa). + */ +void +sample_texture_2d_bilinear(vector float s, vector float t, +                           uint unit, uint level, uint face, +                           vector float colors[4]) +{ +   const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; +   static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; + +   vector float ss = spu_madd(s, tlevel->scale_s, half); +   vector float tt = spu_madd(t, tlevel->scale_t, half); + +   vector signed int is0 = spu_convts(ss, 0); +   vector signed int it0 = spu_convts(tt, 0); + +   /* is + 1, it + 1 */ +   vector signed int is1 = spu_add(is0, 1); +   vector signed int it1 = spu_add(it0, 1); + +   /* PIPE_TEX_WRAP_REPEAT */ +   is0 = spu_and(is0, tlevel->mask_s); +   it0 = spu_and(it0, tlevel->mask_t); +   is1 = spu_and(is1, tlevel->mask_s); +   it1 = spu_and(it1, tlevel->mask_t); + +   /* PIPE_TEX_WRAP_CLAMP */ +   is0 = spu_clamp(is0, tlevel->max_s); +   it0 = spu_clamp(it0, tlevel->max_t); +   is1 = spu_clamp(is1, tlevel->max_s); +   it1 = spu_clamp(it1, tlevel->max_t); + +   /* get packed int texels */ +   vector unsigned int texels[16]; +   get_four_texels(tlevel, face, is0, it0, texels + 0);  /* upper-left */ +   get_four_texels(tlevel, face, is1, it0, texels + 4);  /* upper-right */ +   get_four_texels(tlevel, face, is0, it1, texels + 8);  /* lower-left */ +   get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */ + +   /* convert packed int texels to float colors */ +   vector float ftexels[16]; +   spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0); +   spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4); +   spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8); +   spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12); + +   /* Compute weighting factors in [0,1] +    * Multiply texcoord by 1024, AND with 1023, convert back to float. +    */ +   vector float ss1024 = spu_mul(ss, spu_splats(1024.0f)); +   vector signed int iss1024 = spu_convts(ss1024, 0); +   iss1024 = spu_and(iss1024, 1023); +   vector float sWeights0 = spu_convtf(iss1024, 10); + +   vector float tt1024 = spu_mul(tt, spu_splats(1024.0f)); +   vector signed int itt1024 = spu_convts(tt1024, 0); +   itt1024 = spu_and(itt1024, 1023); +   vector float tWeights0 = spu_convtf(itt1024, 10); + +   /* 1 - sWeight and 1 - tWeight */ +   vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0); +   vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0); + +   /* reds, for four pixels */ +   ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/ +   ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/ +   ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/ +   ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/ +   colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]), +                       spu_add(ftexels[8], ftexels[12])); + +   /* greens, for four pixels */ +   ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/ +   ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/ +   ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/ +   ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/ +   colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]), +                       spu_add(ftexels[9], ftexels[13])); + +   /* blues, for four pixels */ +   ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/ +   ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/ +   ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/ +   ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/ +   colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]), +                       spu_add(ftexels[10], ftexels[14])); + +   /* alphas, for four pixels */ +   ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/ +   ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/ +   ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/ +   ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/ +   colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]), +                       spu_add(ftexels[11], ftexels[15])); +} + + + +/** + * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h + */ +static INLINE void +transpose(vector unsigned int *mOut0, +          vector unsigned int *mOut1, +          vector unsigned int *mOut2, +          vector unsigned int *mOut3, +          vector unsigned int *mIn) +{ +  vector unsigned int abcd, efgh, ijkl, mnop;	/* input vectors */ +  vector unsigned int aeim, bfjn, cgko, dhlp;	/* output vectors */ +  vector unsigned int aibj, ckdl, emfn, gohp;	/* intermediate vectors */ + +  vector unsigned char shufflehi = ((vector unsigned char) { +					       0x00, 0x01, 0x02, 0x03, +					       0x10, 0x11, 0x12, 0x13, +					       0x04, 0x05, 0x06, 0x07, +					       0x14, 0x15, 0x16, 0x17}); +  vector unsigned char shufflelo = ((vector unsigned char) { +					       0x08, 0x09, 0x0A, 0x0B, +					       0x18, 0x19, 0x1A, 0x1B, +					       0x0C, 0x0D, 0x0E, 0x0F, +					       0x1C, 0x1D, 0x1E, 0x1F}); +  abcd = *(mIn+0); +  efgh = *(mIn+1); +  ijkl = *(mIn+2); +  mnop = *(mIn+3); + +  aibj = spu_shuffle(abcd, ijkl, shufflehi); +  ckdl = spu_shuffle(abcd, ijkl, shufflelo); +  emfn = spu_shuffle(efgh, mnop, shufflehi); +  gohp = spu_shuffle(efgh, mnop, shufflelo); + +  aeim = spu_shuffle(aibj, emfn, shufflehi); +  bfjn = spu_shuffle(aibj, emfn, shufflelo); +  cgko = spu_shuffle(ckdl, gohp, shufflehi); +  dhlp = spu_shuffle(ckdl, gohp, shufflelo); + +  *mOut0 = aeim; +  *mOut1 = bfjn; +  *mOut2 = cgko; +  *mOut3 = dhlp; +} + + +/** + * Bilinear filtering, using int instead of float arithmetic for computing + * sample weights. + */ +void +sample_texture_2d_bilinear_int(vector float s, vector float t, +                               uint unit, uint level, uint face, +                               vector float colors[4]) +{ +   const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; +   static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; + +   /* Scale texcoords by size of texture, and add half pixel bias */ +   vector float ss = spu_madd(s, tlevel->scale_s, half); +   vector float tt = spu_madd(t, tlevel->scale_t, half); + +   /* convert float coords to fixed-pt coords with 7 fraction bits */ +   vector signed int is = spu_convts(ss, 7);  /* XXX really need floor() here */ +   vector signed int it = spu_convts(tt, 7);  /* XXX really need floor() here */ + +   /* compute integer texel weights in [0, 127] */ +   vector signed int sWeights0 = spu_and(is, 127); +   vector signed int tWeights0 = spu_and(it, 127); +   vector signed int sWeights1 = spu_sub(127, sWeights0); +   vector signed int tWeights1 = spu_sub(127, tWeights0); + +   /* texel coords: is0 = is / 128, it0 = is / 128 */ +   vector signed int is0 = spu_rlmask(is, -7); +   vector signed int it0 = spu_rlmask(it, -7); + +   /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */ +   vector signed int is1 = spu_add(is0, 1); +   vector signed int it1 = spu_add(it0, 1); + +   /* PIPE_TEX_WRAP_REPEAT */ +   is0 = spu_and(is0, tlevel->mask_s); +   it0 = spu_and(it0, tlevel->mask_t); +   is1 = spu_and(is1, tlevel->mask_s); +   it1 = spu_and(it1, tlevel->mask_t); + +   /* PIPE_TEX_WRAP_CLAMP */ +   is0 = spu_clamp(is0, tlevel->max_s); +   it0 = spu_clamp(it0, tlevel->max_t); +   is1 = spu_clamp(is1, tlevel->max_s); +   it1 = spu_clamp(it1, tlevel->max_t); + +   /* get packed int texels */ +   vector unsigned int texels[16]; +   get_four_texels(tlevel, face, is0, it0, texels + 0);  /* upper-left */ +   get_four_texels(tlevel, face, is1, it0, texels + 4);  /* upper-right */ +   get_four_texels(tlevel, face, is0, it1, texels + 8);  /* lower-left */ +   get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */ + +   /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */ +   { +      static const unsigned char ZERO = 0x80; +      int i; +      for (i = 0; i < 16; i++) { +         texels[i] = spu_shuffle(texels[i], texels[i], +                                 ((vector unsigned char) { +                                    ZERO, ZERO, ZERO, 1, +                                    ZERO, ZERO, ZERO, 2, +                                    ZERO, ZERO, ZERO, 3, +                                    ZERO, ZERO, ZERO, 0})); +      } +   } + +   /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */ +   vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7, +      texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15; +   transpose(&texel0, &texel1, &texel2, &texel3, texels + 0); +   transpose(&texel4, &texel5, &texel6, &texel7, texels + 4); +   transpose(&texel8, &texel9, &texel10, &texel11, texels + 8); +   transpose(&texel12, &texel13, &texel14, &texel15, texels + 12); + +   /* computed weighted colors */ +   vector unsigned int c0, c1, c2, c3, cSum; + +   /* red */ +   c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ +   c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ +   c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ +   c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ +   cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); +   colors[0] = spu_convtf(cSum, 22); + +   /* green */ +   c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ +   c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ +   c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ +   c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ +   cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); +   colors[1] = spu_convtf(cSum, 22); + +   /* blue */ +   c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ +   c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ +   c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ +   c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ +   cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); +   colors[2] = spu_convtf(cSum, 22); + +   /* alpha */ +   c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ +   c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ +   c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ +   c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ +   cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); +   colors[3] = spu_convtf(cSum, 22); +} + + + +/** + * Compute level of detail factor from texcoords. + */ +static INLINE float +compute_lambda_2d(uint unit, vector float s, vector float t) +{ +   uint baseLevel = 0; +   float width = spu.texture[unit].level[baseLevel].width; +   float height = spu.texture[unit].level[baseLevel].width; +   float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0)); +   float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0)); +   float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0)); +   float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0)); +#if 0 +   /* ideal value */ +   float x = dsdx * dsdx + dtdx * dtdx; +   float y = dsdy * dsdy + dtdy * dtdy; +   float rho = x > y ? x : y; +   rho = sqrtf(rho); +#else +   /* approximation */ +   dsdx = fabsf(dsdx); +   dsdy = fabsf(dsdy); +   dtdx = fabsf(dtdx); +   dtdy = fabsf(dtdy); +   float rho = (dsdx + dsdy + dtdx + dtdy) * 0.5; +#endif +   float lambda = logf(rho) * 1.442695f; /* compute logbase2(rho) */ +   return lambda; +} + + +/** + * Blend two sets of colors according to weight. + */ +static void +blend_colors(vector float c0[4], const vector float c1[4], float weight) +{ +   vector float t = spu_splats(weight); +   vector float dc0 = spu_sub(c1[0], c0[0]); +   vector float dc1 = spu_sub(c1[1], c0[1]); +   vector float dc2 = spu_sub(c1[2], c0[2]); +   vector float dc3 = spu_sub(c1[3], c0[3]); +   c0[0] = spu_madd(dc0, t, c0[0]); +   c0[1] = spu_madd(dc1, t, c0[1]); +   c0[2] = spu_madd(dc2, t, c0[2]); +   c0[3] = spu_madd(dc3, t, c0[3]); +} + + +/** + * Texture sampling with level of detail selection and possibly mipmap + * interpolation. + */ +void +sample_texture_2d_lod(vector float s, vector float t, +                      uint unit, uint level_ignored, uint face, +                      vector float colors[4]) +{ +   /* +    * Note that we're computing a lambda/lod here that's used for all +    * four pixels in the quad. +    */ +   float lambda = compute_lambda_2d(unit, s, t); + +   (void) face; +   (void) level_ignored; + +   /* apply lod bias */ +   lambda += spu.sampler[unit].lod_bias; + +   /* clamp */ +   if (lambda < spu.sampler[unit].min_lod) +      lambda = spu.sampler[unit].min_lod; +   else if (lambda > spu.sampler[unit].max_lod) +      lambda = spu.sampler[unit].max_lod; + +   if (lambda <= 0.0f) { +      /* magnify */ +      spu.mag_sample_texture_2d[unit](s, t, unit, 0, face, colors); +   } +   else { +      /* minify */ +      if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { +         /* sample two mipmap levels and interpolate */ +         int level = (int) lambda; +         if (level > (int) spu.texture[unit].max_level) +            level = spu.texture[unit].max_level; +         spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors); +         if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { +            /* sample second mipmap level */ +            float weight = lambda - (float) level; +            level++; +            if (level <= (int) spu.texture[unit].max_level) { +               vector float colors2[4]; +               spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors2); +               blend_colors(colors, colors2, weight); +            } +         } +      } +      else { +         /* sample one mipmap level */ +         int level = (int) (lambda + 0.5f); +         if (level > (int) spu.texture[unit].max_level) +            level = spu.texture[unit].max_level; +         spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors); +      } +   } +} + + +/** XXX need a SIMD version of this */ +static unsigned +choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) +{ +   /* +      major axis +      direction     target                             sc     tc    ma +      ----------    -------------------------------    ---    ---   --- +       +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx +       -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx +       +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry +       -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry +       +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz +       -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz +   */ +   const float arx = fabsf(rx); +   const float ary = fabsf(ry); +   const float arz = fabsf(rz); +   unsigned face; +   float sc, tc, ma; + +   if (arx > ary && arx > arz) { +      if (rx >= 0.0F) { +         face = PIPE_TEX_FACE_POS_X; +         sc = -rz; +         tc = -ry; +         ma = arx; +      } +      else { +         face = PIPE_TEX_FACE_NEG_X; +         sc = rz; +         tc = -ry; +         ma = arx; +      } +   } +   else if (ary > arx && ary > arz) { +      if (ry >= 0.0F) { +         face = PIPE_TEX_FACE_POS_Y; +         sc = rx; +         tc = rz; +         ma = ary; +      } +      else { +         face = PIPE_TEX_FACE_NEG_Y; +         sc = rx; +         tc = -rz; +         ma = ary; +      } +   } +   else { +      if (rz > 0.0F) { +         face = PIPE_TEX_FACE_POS_Z; +         sc = rx; +         tc = -ry; +         ma = arz; +      } +      else { +         face = PIPE_TEX_FACE_NEG_Z; +         sc = -rx; +         tc = -ry; +         ma = arz; +      } +   } + +   *newS = (sc / ma + 1.0F) * 0.5F; +   *newT = (tc / ma + 1.0F) * 0.5F; + +   return face; +} + + + +void +sample_texture_cube(vector float s, vector float t, vector float r, +                    uint unit, vector float colors[4]) +{ +   uint p, faces[4], level = 0; +   float newS[4], newT[4]; + +   /* Compute cube faces referenced by the four sets of texcoords. +    * XXX we should SIMD-ize this. +    */ +   for (p = 0; p < 4; p++) {       +      float rx = spu_extract(s, p); +      float ry = spu_extract(t, p); +      float rz = spu_extract(r, p); +      faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]); +   } + +   if (faces[0] == faces[1] && +       faces[0] == faces[2] && +       faces[0] == faces[3]) { +      /* GOOD!  All four texcoords refer to the same cube face */ +      s = (vector float) {newS[0], newS[1], newS[2], newS[3]}; +      t = (vector float) {newT[0], newT[1], newT[2], newT[3]}; +      spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors); +   } +   else { +      /* BAD!  The four texcoords refer to different faces */ +      for (p = 0; p < 4; p++) {       +         vector float c[4]; + +         spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]), +                                     unit, level, faces[p], c); + +         float red = spu_extract(c[0], p); +         float green = spu_extract(c[1], p); +         float blue = spu_extract(c[2], p); +         float alpha = spu_extract(c[3], p); + +         colors[0] = spu_insert(red,   colors[0], p); +         colors[1] = spu_insert(green, colors[1], p); +         colors[2] = spu_insert(blue,  colors[2], p); +         colors[3] = spu_insert(alpha, colors[3], p); +      } +   } +} diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h new file mode 100644 index 0000000000..7b75b007b5 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -0,0 +1,67 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef SPU_TEXTURE_H +#define SPU_TEXTURE_H + + +#include "pipe/p_compiler.h" + + +extern void +invalidate_tex_cache(void); + + +extern void +sample_texture_2d_nearest(vector float s, vector float t, +                          uint unit, uint level, uint face, +                          vector float colors[4]); + + +extern void +sample_texture_2d_bilinear(vector float s, vector float t, +                           uint unit, uint level, uint face, +                           vector float colors[4]); + +extern void +sample_texture_2d_bilinear_int(vector float s, vector float t, +                               uint unit, uint level, uint face, +                               vector float colors[4]); + + +extern void +sample_texture_2d_lod(vector float s, vector float t, +                      uint unit, uint level, uint face, +                      vector float colors[4]); + + +extern void +sample_texture_cube(vector float s, vector float t, vector float r, +                    uint unit, vector float colors[4]); + + +#endif /* SPU_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c new file mode 100644 index 0000000000..6905015a48 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tile.c @@ -0,0 +1,126 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + + +#include "spu_tile.h" +#include "spu_main.h" + + +/** + * Get tile of color or Z values from main memory, put into SPU memory. + */ +void +get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf) +{ +   const uint offset = ty * spu.fb.width_tiles + tx; +   const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); +   const ubyte *src = zBuf ? spu.fb.depth_start : spu.fb.color_start; + +   src += offset * bytesPerTile; + +   ASSERT(tx < spu.fb.width_tiles); +   ASSERT(ty < spu.fb.height_tiles); +   ASSERT_ALIGN16(tile); +   /* +   printf("get_tile:  dest: %p  src: 0x%x  size: %d\n", +          tile, (unsigned int) src, bytesPerTile); +   */ +   mfc_get(tile->ui,  /* dest in local memory */ +           (unsigned int) src, /* src in main memory */ +           bytesPerTile, +           tag, +           0, /* tid */ +           0  /* rid */); +} + + +/** + * Move tile of color or Z values from SPU memory to main memory. + */ +void +put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf) +{ +   const uint offset = ty * spu.fb.width_tiles + tx; +   const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); +   ubyte *dst = zBuf ? spu.fb.depth_start : spu.fb.color_start; + +   dst += offset * bytesPerTile; + +   ASSERT(tx < spu.fb.width_tiles); +   ASSERT(ty < spu.fb.height_tiles); +   ASSERT_ALIGN16(tile); +   /* +   printf("SPU %u: put_tile:  src: %p  dst: 0x%x  size: %d\n", +          spu.init.id, +          tile, (unsigned int) dst, bytesPerTile); +   */ +   mfc_put((void *) tile->ui,  /* src in local memory */ +           (unsigned int) dst,  /* dst in main memory */ +           bytesPerTile, +           tag, +           0, /* tid */ +           0  /* rid */); +} + + +/** + * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled + * tiles back to the main framebuffer. + */ +void +really_clear_tiles(uint surfaceIndex) +{ +   const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; +   uint i; + +   if (surfaceIndex == 0) { +      clear_c_tile(&spu.ctile); + +      for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { +         uint tx = i % spu.fb.width_tiles; +         uint ty = i / spu.fb.width_tiles; +         if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { +            put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); +         } +      } +   } +   else { +      clear_z_tile(&spu.ztile); + +      for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { +         uint tx = i % spu.fb.width_tiles; +         uint ty = i / spu.fb.width_tiles; +         if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) +            put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); +      } +   } + +#if 0 +   wait_on_mask(1 << TAG_SURFACE_CLEAR); +#endif +} diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h new file mode 100644 index 0000000000..7bfb52be8f --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tile.h @@ -0,0 +1,75 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef SPU_TILE_H +#define SPU_TILE_H + + +#include <libmisc.h> +#include <spu_mfcio.h> +#include "spu_main.h" +#include "cell/common.h" + + + +extern void +get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf); + +extern void +put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf); + +extern void +really_clear_tiles(uint surfaceIndex); + + +static INLINE void +clear_c_tile(tile_t *ctile) +{ +   memset32((uint*) ctile->ui, +            spu.fb.color_clear_value, +            TILE_SIZE * TILE_SIZE); +} + + +static INLINE void +clear_z_tile(tile_t *ztile) +{ +   if (spu.fb.zsize == 2) { +      memset16((ushort*) ztile->us, +               spu.fb.depth_clear_value, +               TILE_SIZE * TILE_SIZE); +   } +   else { +      ASSERT(spu.fb.zsize != 0); +      memset32((uint*) ztile->ui, +               spu.fb.depth_clear_value, +               TILE_SIZE * TILE_SIZE); +   } +} + + +#endif /* SPU_TILE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c new file mode 100644 index 0000000000..0d9fcb9997 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -0,0 +1,809 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * Triangle rendering within a tile. + */ + +#include <transpose_matrix4x4.h> +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "util/u_math.h" +#include "spu_colorpack.h" +#include "spu_main.h" +#include "spu_shuffle.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_tri.h" + + +/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ +typedef vector unsigned int mask_t; + + + +/** + * Simplified types taken from other parts of Gallium + */ +struct vertex_header { +   vector float data[1]; +}; + + + +/* XXX fix this */ +#undef CEILF +#define CEILF(X) ((float) (int) ((X) + 0.99999f)) + + +#define QUAD_TOP_LEFT     0 +#define QUAD_TOP_RIGHT    1 +#define QUAD_BOTTOM_LEFT  2 +#define QUAD_BOTTOM_RIGHT 3 +#define MASK_TOP_LEFT     (1 << QUAD_TOP_LEFT) +#define MASK_TOP_RIGHT    (1 << QUAD_TOP_RIGHT) +#define MASK_BOTTOM_LEFT  (1 << QUAD_BOTTOM_LEFT) +#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) +#define MASK_ALL          0xf + + +#define DEBUG_VERTS 0 + +/** + * Triangle edge info + */ +struct edge { +   union { +      struct { +         float dx;	/**< X(v1) - X(v0), used only during setup */ +         float dy;	/**< Y(v1) - Y(v0), used only during setup */ +      }; +      vec_float4 ds;    /**< vector accessor for dx and dy */ +   }; +   float dxdy;		/**< dx/dy */ +   float sx, sy;	/**< first sample point coord */ +   int lines;		/**< number of lines on this edge */ +}; + + +struct interp_coef +{ +   vector float a0; +   vector float dadx; +   vector float dady; +}; + + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_stage { + +   /* Vertices are just an array of floats making up each attribute in +    * turn.  Currently fixed at 4 floats, but should change in time. +    * Codegen will help cope with this. +    */ +   union { +      struct { +         const struct vertex_header *vmin; +         const struct vertex_header *vmid; +         const struct vertex_header *vmax; +         const struct vertex_header *vprovoke; +      }; +      qword vertex_headers; +   }; + +   struct edge ebot; +   struct edge etop; +   struct edge emaj; + +   float oneOverArea;  /* XXX maybe make into vector? */ + +   uint facing; + +   uint tx, ty;  /**< position of current tile (x, y) */ + +   int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy; + +   struct interp_coef coef[PIPE_MAX_SHADER_INPUTS]; + +   struct { +      vec_int4 quad; /**< [0] = row0, [1] = row1; {left[0],left[1],right[0],right[1]} */ +      int y; +      unsigned y_flags; +      unsigned mask;     /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ +   } span; +}; + + +static struct setup_stage setup; + + +/** + * Evaluate attribute coefficients (plane equations) to compute + * attribute values for the four fragments in a quad. + * Eg: four colors will be computed (in AoS format). + */ +static INLINE void +eval_coeff(uint slot, float x, float y, vector float w, vector float result[4]) +{ +   switch (spu.vertex_info.attrib[slot].interp_mode) { +   case INTERP_CONSTANT: +      result[QUAD_TOP_LEFT] = +      result[QUAD_TOP_RIGHT] = +      result[QUAD_BOTTOM_LEFT] = +      result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0; +      break; +   case INTERP_LINEAR: +      { +         vector float dadx = setup.coef[slot].dadx; +         vector float dady = setup.coef[slot].dady; +         vector float topLeft = +            spu_add(setup.coef[slot].a0, +                    spu_add(spu_mul(spu_splats(x), dadx), +                            spu_mul(spu_splats(y), dady))); + +         result[QUAD_TOP_LEFT] = topLeft; +         result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx); +         result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady); +         result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady); +      } +      break; +   case INTERP_PERSPECTIVE: +      { +         vector float dadx = setup.coef[slot].dadx; +         vector float dady = setup.coef[slot].dady; +         vector float topLeft = +            spu_add(setup.coef[slot].a0, +                    spu_add(spu_mul(spu_splats(x), dadx), +                            spu_mul(spu_splats(y), dady))); + +         vector float wInv = spu_re(w);  /* 1.0 / w */ + +         result[QUAD_TOP_LEFT] = spu_mul(topLeft, wInv); +         result[QUAD_TOP_RIGHT] = spu_mul(spu_add(topLeft, dadx), wInv); +         result[QUAD_BOTTOM_LEFT] = spu_mul(spu_add(topLeft, dady), wInv); +         result[QUAD_BOTTOM_RIGHT] = spu_mul(spu_add(spu_add(topLeft, dadx), dady), wInv); +      } +      break; +   case INTERP_POS: +   case INTERP_NONE: +      break; +   default: +      ASSERT(0); +   } +} + + +/** + * As above, but return 4 vectors in SOA format. + * XXX this will all be re-written someday. + */ +static INLINE void +eval_coeff_soa(uint slot, float x, float y, vector float w, vector float result[4]) +{ +   eval_coeff(slot, x, y, w, result); +   _transpose_matrix4x4(result, result); +} + + +/** Evalute coefficients to get Z for four pixels in a quad */ +static INLINE vector float +eval_z(float x, float y) +{ +   const uint slot = 0; +   const float dzdx = spu_extract(setup.coef[slot].dadx, 2); +   const float dzdy = spu_extract(setup.coef[slot].dady, 2); +   const float topLeft = spu_extract(setup.coef[slot].a0, 2) + x * dzdx + y * dzdy; +   const vector float topLeftv = spu_splats(topLeft); +   const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy }; +   return spu_add(topLeftv, derivs); +} + + +/** Evalute coefficients to get W for four pixels in a quad */ +static INLINE vector float +eval_w(float x, float y) +{ +   const uint slot = 0; +   const float dwdx = spu_extract(setup.coef[slot].dadx, 3); +   const float dwdy = spu_extract(setup.coef[slot].dady, 3); +   const float topLeft = spu_extract(setup.coef[slot].a0, 3) + x * dwdx + y * dwdy; +   const vector float topLeftv = spu_splats(topLeft); +   const vector float derivs = (vector float) { 0.0, dwdx, dwdy, dwdx + dwdy }; +   return spu_add(topLeftv, derivs); +} + + +/** + * Emit a quad (pass to next stage).  No clipping is done. + * Note: about 1/5 to 1/7 of the time, mask is zero and this function + * should be skipped.  But adding the test for that slows things down + * overall. + */ +static INLINE void +emit_quad( int x, int y, mask_t mask) +{ +   /* If any bits in mask are set... */ +   if (spu_extract(spu_orx(mask), 0)) { +      const int ix = x - setup.cliprect_minx; +      const int iy = y - setup.cliprect_miny; + +      spu.cur_ctile_status = TILE_STATUS_DIRTY; +      spu.cur_ztile_status = TILE_STATUS_DIRTY; + +      { +         /* +          * Run fragment shader, execute per-fragment ops, update fb/tile. +          */ +         vector float inputs[4*4], outputs[2*4]; +         vector float fragZ = eval_z((float) x, (float) y); +         vector float fragW = eval_w((float) x, (float) y); +         vector unsigned int kill_mask; + +         /* setup inputs */ +#if 0 +         eval_coeff_soa(1, (float) x, (float) y, fragW, inputs); +#else +         uint i; +         for (i = 0; i < spu.vertex_info.num_attribs; i++) { +            eval_coeff_soa(i+1, (float) x, (float) y, fragW, inputs + i * 4); +         } +#endif +         ASSERT(spu.fragment_program); +         ASSERT(spu.fragment_ops); + +         /* Execute the current fragment program */ +         kill_mask = spu.fragment_program(inputs, outputs, spu.constants); + +         mask = spu_andc(mask, kill_mask); + +         /* Execute per-fragment/quad operations, including: +          * alpha test, z test, stencil test, blend and framebuffer writing. +          * Note that there are two different fragment operations functions +          * that can be called, one for front-facing fragments, and one +          * for back-facing fragments.  (Often the two are the same; +          * but in some cases, like two-sided stenciling, they can be +          * very different.)  So choose the correct function depending +          * on the calculated facing. +          */ +         spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile, +                          fragZ, +                          outputs[0*4+0], +                          outputs[0*4+1], +                          outputs[0*4+2], +                          outputs[0*4+3], +                          mask); +      } +   } +} + + +/** + * Given an X or Y coordinate, return the block/quad coordinate that it + * belongs to. + */ +static INLINE int +block(int x) +{ +   return x & ~1; +} + + +/** + * Render a horizontal span of quads + */ +static void +flush_spans(void) +{ +   int minleft, maxright; + +   const int l0 = spu_extract(setup.span.quad, 0); +   const int l1 = spu_extract(setup.span.quad, 1); +   const int r0 = spu_extract(setup.span.quad, 2); +   const int r1 = spu_extract(setup.span.quad, 3); + +   switch (setup.span.y_flags) { +   case 0x3: +      /* both odd and even lines written (both quad rows) */ +      minleft = MIN2(l0, l1); +      maxright = MAX2(r0, r1); +      break; + +   case 0x1: +      /* only even line written (quad top row) */ +      minleft = l0; +      maxright = r0; +      break; + +   case 0x2: +      /* only odd line written (quad bottom row) */ +      minleft = l1; +      maxright = r1; +      break; + +   default: +      return; +   } + +   /* OK, we're very likely to need the tile data now. +    * clear or finish waiting if needed. +    */ +   if (spu.cur_ctile_status == TILE_STATUS_GETTING) { +      /* wait for mfc_get() to complete */ +      //printf("SPU: %u: waiting for ctile\n", spu.init.id); +      wait_on_mask(1 << TAG_READ_TILE_COLOR); +      spu.cur_ctile_status = TILE_STATUS_CLEAN; +   } +   else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) { +      //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty); +      clear_c_tile(&spu.ctile); +      spu.cur_ctile_status = TILE_STATUS_DIRTY; +   } +   ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); + +   if (spu.read_depth_stencil) { +      if (spu.cur_ztile_status == TILE_STATUS_GETTING) { +         /* wait for mfc_get() to complete */ +         //printf("SPU: %u: waiting for ztile\n", spu.init.id); +         wait_on_mask(1 << TAG_READ_TILE_Z); +         spu.cur_ztile_status = TILE_STATUS_CLEAN; +      } +      else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) { +         //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty); +         clear_z_tile(&spu.ztile); +         spu.cur_ztile_status = TILE_STATUS_DIRTY; +      } +      ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED); +   } + +   /* XXX this loop could be moved into the above switch cases... */ +    +   /* Setup for mask calculation */ +   const vec_int4 quad_LlRr = setup.span.quad; +   const vec_int4 quad_RrLl = spu_rlqwbyte(quad_LlRr, 8); +   const vec_int4 quad_LLll = spu_shuffle(quad_LlRr, quad_LlRr, SHUFFLE4(A,A,B,B)); +   const vec_int4 quad_RRrr = spu_shuffle(quad_RrLl, quad_RrLl, SHUFFLE4(A,A,B,B)); + +   const vec_int4 twos = spu_splats(2); + +   const int x = block(minleft); +   vec_int4 xs = {x, x+1, x, x+1}; + +   for (; spu_extract(xs, 0) <= block(maxright); xs += twos) { +      /** +       * Computes mask to indicate which pixels in the 2x2 quad are actually +       * inside the triangle's bounds. +       */ +       +      /* Calculate ({x,x+1,x,x+1} >= {l[0],l[0],l[1],l[1]}) */ +      const mask_t gt_LLll_xs = spu_cmpgt(quad_LLll, xs); +      const mask_t gte_xs_LLll = spu_nand(gt_LLll_xs, gt_LLll_xs);  +       +      /* Calculate ({r[0],r[0],r[1],r[1]} > {x,x+1,x,x+1}) */ +      const mask_t gt_RRrr_xs = spu_cmpgt(quad_RRrr, xs); + +      /* Combine results to create mask */ +      const mask_t mask = spu_and(gte_xs_LLll, gt_RRrr_xs); + +      emit_quad(spu_extract(xs, 0), setup.span.y, mask); +   } + +   setup.span.y = 0; +   setup.span.y_flags = 0; +   /* Zero right elements */ +   setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0)); +} + + +#if DEBUG_VERTS +static void +print_vertex(const struct vertex_header *v) +{ +   uint i; +   fprintf(stderr, "  Vertex: (%p)\n", v); +   for (i = 0; i < spu.vertex_info.num_attribs; i++) { +      fprintf(stderr, "    %d: %f %f %f %f\n",  i,  +              spu_extract(v->data[i], 0), +              spu_extract(v->data[i], 1), +              spu_extract(v->data[i], 2), +              spu_extract(v->data[i], 3)); +   } +} +#endif + + +/** + * Sort vertices from top to bottom. + * Compute area and determine front vs. back facing. + * Do coarse clip test against tile bounds + * \return  FALSE if tri is totally outside tile, TRUE otherwise + */ +static boolean +setup_sort_vertices(const struct vertex_header *v0, +                    const struct vertex_header *v1, +                    const struct vertex_header *v2) +{ +   float area, sign; + +#if DEBUG_VERTS +   if (spu.init.id==0) { +      fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id); +      print_vertex(v0); +      print_vertex(v1); +      print_vertex(v2); +   } +#endif + +   /* determine bottom to top order of vertices */ +   { +      /* A table of shuffle patterns for putting vertex_header pointers into +         correct order.  Quite magical. */ +      const vec_uchar16 sort_order_patterns[] = { +         SHUFFLE4(A,B,C,C), +         SHUFFLE4(C,A,B,C), +         SHUFFLE4(A,C,B,C), +         SHUFFLE4(B,C,A,C), +         SHUFFLE4(B,A,C,C), +         SHUFFLE4(C,B,A,C) }; + +      /* The vertex_header pointers, packed for easy shuffling later */ +      const vec_uint4 vs = {(unsigned)v0, (unsigned)v1, (unsigned)v2}; + +      /* Collate y values into two vectors for comparison. +         Using only one shuffle constant! ;) */ +      const vec_float4 y_02_ = spu_shuffle(v0->data[0], v2->data[0], SHUFFLE4(0,B,b,C)); +      const vec_float4 y_10_ = spu_shuffle(v1->data[0], v0->data[0], SHUFFLE4(0,B,b,C)); +      const vec_float4 y_012 = spu_shuffle(y_02_, v1->data[0], SHUFFLE4(0,B,b,C)); +      const vec_float4 y_120 = spu_shuffle(y_10_, v2->data[0], SHUFFLE4(0,B,b,C)); + +      /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */ +      const vec_uint4 compare = spu_cmpgt(y_012, y_120); +      /* Compress the result of the comparison into 4 bits */ +      const vec_uint4 gather = spu_gather(compare); +      /* Subtract one to attain the index into the LUT.  Magical. */ +      const unsigned int index = spu_extract(gather, 0) - 1; + +      /* Load the appropriate pattern and construct the desired vector. */ +      setup.vertex_headers = (qword)spu_shuffle(vs, vs, sort_order_patterns[index]); + +      /* Using the result of the comparison, set sign. +         Very magical. */ +      sign = ((si_to_uint(si_cntb((qword)gather)) == 2) ? 1.0f : -1.0f); +   } + +   /* Check if triangle is completely outside the tile bounds */ +   if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy) +      return FALSE; +   if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny) +      return FALSE; +   if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx && +       spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx && +       spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx) +      return FALSE; +   if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx && +       spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx && +       spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx) +      return FALSE; + +   setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]); +   setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]); +   setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]); + +   /* +    * Compute triangle's area.  Use 1/area to compute partial +    * derivatives of attributes later. +    */ +   area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy; + +   setup.oneOverArea = 1.0f / area; + +   /* The product of area * sign indicates front/back orientation (0/1). +    * Just in case someone gets the bright idea of switching the front +    * and back constants without noticing that we're assuming their +    * values in this operation, also assert that the values are +    * what we think they are. +    */ +   ASSERT(CELL_FACING_FRONT == 0); +   ASSERT(CELL_FACING_BACK == 1); +   setup.facing = (area * sign > 0.0f) +      ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW); + +   return TRUE; +} + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + * The value value comes from vertex->data[slot]. + * The result will be put into setup.coef[slot].a0. + * \param slot  which attribute slot  + */ +static INLINE void +const_coeff4(uint slot) +{ +   setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0}; +   setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0}; +   setup.coef[slot].a0 = setup.vprovoke->data[slot]; +} + + +/** + * As above, but interp setup all four vector components. + */ +static INLINE void +tri_linear_coeff4(uint slot) +{ +   const vector float vmin_d = setup.vmin->data[slot]; +   const vector float vmid_d = setup.vmid->data[slot]; +   const vector float vmax_d = setup.vmax->data[slot]; +   const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); +   const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); + +   vector float botda = vmid_d - vmin_d; +   vector float majda = vmax_d - vmin_d; + +   vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), +                            spu_mul(botda, spu_splats(setup.emaj.dy))); +   vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), +                            spu_mul(majda, spu_splats(setup.ebot.dx))); + +   setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); +   setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); + +   vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); +   vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); +                          +   setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void +tri_persp_coeff4(uint slot) +{ +   const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); +   const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); + +   const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3)); +   const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3)); +   const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3)); + +   vector float vmin_d = setup.vmin->data[slot]; +   vector float vmid_d = setup.vmid->data[slot]; +   vector float vmax_d = setup.vmax->data[slot]; + +   vmin_d = spu_mul(vmin_d, vmin_w); +   vmid_d = spu_mul(vmid_d, vmid_w); +   vmax_d = spu_mul(vmax_d, vmax_w); + +   vector float botda = vmid_d - vmin_d; +   vector float majda = vmax_d - vmin_d; + +   vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), +                            spu_mul(botda, spu_splats(setup.emaj.dy))); +   vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), +                            spu_mul(majda, spu_splats(setup.ebot.dx))); + +   setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); +   setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); + +   vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); +   vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); +                          +   setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); +} + + + +/** + * Compute the setup.coef[] array dadx, dady, a0 values. + * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized. + */ +static void +setup_tri_coefficients(void) +{ +   uint i; + +   for (i = 0; i < spu.vertex_info.num_attribs; i++) { +      switch (spu.vertex_info.attrib[i].interp_mode) { +      case INTERP_NONE: +         break; +      case INTERP_CONSTANT: +         const_coeff4(i); +         break; +      case INTERP_POS: +         /* fall-through */ +      case INTERP_LINEAR: +         tri_linear_coeff4(i); +         break; +      case INTERP_PERSPECTIVE: +         tri_persp_coeff4(i); +         break; +      default: +         ASSERT(0); +      } +   } +} + + +static void +setup_tri_edges(void) +{ +   float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f; +   float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f; + +   float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f; +   float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f; +   float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f; + +   setup.emaj.sy = CEILF(vmin_y); +   setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy); +   setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy; +   setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy; + +   setup.etop.sy = CEILF(vmid_y); +   setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy); +   setup.etop.dxdy = setup.etop.dx / setup.etop.dy; +   setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy; + +   setup.ebot.sy = CEILF(vmin_y); +   setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy); +   setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy; +   setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy; +} + + +/** + * Render the upper or lower half of a triangle. + * Scissoring/cliprect is applied here too. + */ +static void +subtriangle(struct edge *eleft, struct edge *eright, unsigned lines) +{ +   const int minx = setup.cliprect_minx; +   const int maxx = setup.cliprect_maxx; +   const int miny = setup.cliprect_miny; +   const int maxy = setup.cliprect_maxy; +   int y, start_y, finish_y; +   int sy = (int)eleft->sy; + +   ASSERT((int)eleft->sy == (int) eright->sy); + +   /* clip top/bottom */ +   start_y = sy; +   finish_y = sy + lines; + +   if (start_y < miny) +      start_y = miny; + +   if (finish_y > maxy) +      finish_y = maxy; + +   start_y -= sy; +   finish_y -= sy; + +   /* +   _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);   +   */ + +   for (y = start_y; y < finish_y; y++) { + +      /* avoid accumulating adds as floats don't have the precision to +       * accurately iterate large triangle edges that way.  luckily we +       * can just multiply these days. +       * +       * this is all drowned out by the attribute interpolation anyway. +       */ +      int left = (int)(eleft->sx + y * eleft->dxdy); +      int right = (int)(eright->sx + y * eright->dxdy); + +      /* clip left/right */ +      if (left < minx) +         left = minx; +      if (right > maxx) +         right = maxx; + +      if (left < right) { +         int _y = sy + y; +         if (block(_y) != setup.span.y) { +            flush_spans(); +            setup.span.y = block(_y); +         } + +         int offset = _y&1; +         vec_int4 quad_LlRr = {left, left, right, right}; +         /* Store left and right in 0 or 1 row of quad based on offset */ +         setup.span.quad = spu_sel(quad_LlRr, setup.span.quad, spu_maskw(5<<offset)); +         setup.span.y_flags |= 1<<offset; +      } +   } + + +   /* save the values so that emaj can be restarted: +    */ +   eleft->sx += lines * eleft->dxdy; +   eright->sx += lines * eright->dxdy; +   eleft->sy += lines; +   eright->sy += lines; +} + + +/** + * Draw triangle into tile at (tx, ty) (tile coords) + * The tile data should have already been fetched. + */ +boolean +tri_draw(const float *v0, const float *v1, const float *v2, +         uint tx, uint ty) +{ +   setup.tx = tx; +   setup.ty = ty; + +   /* set clipping bounds to tile bounds */ +   setup.cliprect_minx = tx * TILE_SIZE; +   setup.cliprect_miny = ty * TILE_SIZE; +   setup.cliprect_maxx = (tx + 1) * TILE_SIZE; +   setup.cliprect_maxy = (ty + 1) * TILE_SIZE; + +   if (!setup_sort_vertices((struct vertex_header *) v0, +                            (struct vertex_header *) v1, +                            (struct vertex_header *) v2)) { +      return FALSE; /* totally clipped */ +   } + +   setup_tri_coefficients(); +   setup_tri_edges(); + +   setup.span.y = 0; +   setup.span.y_flags = 0; +   /* Zero right elements */ +   setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0)); + +   if (setup.oneOverArea < 0.0) { +      /* emaj on left */ +      subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines ); +      subtriangle( &setup.emaj, &setup.etop, setup.etop.lines ); +   } +   else { +      /* emaj on right */ +      subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines ); +      subtriangle( &setup.etop, &setup.emaj, setup.etop.lines ); +   } + +   flush_spans(); + +   return TRUE; +} diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h new file mode 100644 index 0000000000..aa694dd7c9 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tri.h @@ -0,0 +1,37 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef SPU_TRI_H +#define SPU_TRI_H + + +extern boolean +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty); + + +#endif /* SPU_TRI_H */ diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c new file mode 100644 index 0000000000..b8a0d4a265 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -0,0 +1,167 @@ + +#include "cell/common.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_debug.h" +#include "tgsi/tgsi_parse.h" +//#include "tgsi_build.h" +#include "tgsi/tgsi_util.h" + +unsigned +tgsi_util_get_src_register_swizzle( +   const struct tgsi_src_register *reg, +   unsigned component ) +{ +   switch( component ) { +   case 0: +      return reg->SwizzleX; +   case 1: +      return reg->SwizzleY; +   case 2: +      return reg->SwizzleZ; +   case 3: +      return reg->SwizzleW; +   default: +      ASSERT( 0 ); +   } +   return 0; +} + +unsigned +tgsi_util_get_src_register_extswizzle( +   const struct tgsi_src_register_ext_swz *reg, +   unsigned component ) +{ +   switch( component ) { +   case 0: +      return reg->ExtSwizzleX; +   case 1: +      return reg->ExtSwizzleY; +   case 2: +      return reg->ExtSwizzleZ; +   case 3: +      return reg->ExtSwizzleW; +   default: +      ASSERT( 0 ); +   } +   return 0; +} + +unsigned +tgsi_util_get_full_src_register_extswizzle( +   const struct tgsi_full_src_register  *reg, +   unsigned component ) +{ +   unsigned swizzle; + +   /* +    * First, calculate  the   extended swizzle for a given channel. This will give +    * us either a channel index into the simple swizzle or  a constant 1 or   0. +    */ +   swizzle = tgsi_util_get_src_register_extswizzle( +      ®->SrcRegisterExtSwz, +      component ); + +   ASSERT (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); +   ASSERT (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); +   ASSERT (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); +   ASSERT (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); +   ASSERT (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); +   ASSERT (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); + +   /* +    * Second, calculate the simple  swizzle  for   the   unswizzled channel index. +    * Leave the constants intact, they are   not   affected by the   simple swizzle. +    */ +   if( swizzle <= TGSI_SWIZZLE_W ) { +      swizzle = tgsi_util_get_src_register_swizzle( +         ®->SrcRegister, +         component ); +   } + +   return swizzle; +} + +unsigned +tgsi_util_get_src_register_extnegate( +   const  struct tgsi_src_register_ext_swz *reg, +   unsigned component ) +{ +   switch( component ) { +   case 0: +      return reg->NegateX; +   case 1: +      return reg->NegateY; +   case 2: +      return reg->NegateZ; +   case 3: +      return reg->NegateW; +   default: +      ASSERT( 0 ); +   } +   return 0; +} + +void +tgsi_util_set_src_register_extnegate( +   struct tgsi_src_register_ext_swz *reg, +   unsigned negate, +   unsigned component ) +{ +   switch( component ) { +   case 0: +      reg->NegateX = negate; +      break; +   case 1: +      reg->NegateY = negate; +      break; +   case 2: +      reg->NegateZ = negate; +      break; +   case 3: +      reg->NegateW = negate; +      break; +   default: +      ASSERT( 0 ); +   } +} + +unsigned +tgsi_util_get_full_src_register_sign_mode( +   const struct  tgsi_full_src_register *reg, +   unsigned component ) +{ +   unsigned sign_mode; + +   if( reg->SrcRegisterExtMod.Absolute ) { +      /* Consider only the post-abs negation. */ + +      if( reg->SrcRegisterExtMod.Negate ) { +         sign_mode = TGSI_UTIL_SIGN_SET; +      } +      else { +         sign_mode = TGSI_UTIL_SIGN_CLEAR; +      } +   } +   else { +      /* Accumulate the three negations. */ + +      unsigned negate; + +      negate = reg->SrcRegister.Negate; +      if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { +         negate = !negate; +      } +      if( reg->SrcRegisterExtMod.Negate ) { +         negate = !negate; +      } + +      if( negate ) { +         sign_mode = TGSI_UTIL_SIGN_TOGGLE; +      } +      else { +         sign_mode = TGSI_UTIL_SIGN_KEEP; +      } +   } + +   return sign_mode; +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c new file mode 100644 index 0000000000..03375d84a5 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -0,0 +1,145 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  *   Ian Romanick <idr@us.ibm.com> +  */ + +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "spu_exec.h" +#include "spu_vertex_shader.h" +#include "spu_main.h" +#include "spu_dcache.h" + +typedef void (*spu_fetch_func)(qword *out, const qword *in, +			       const qword *shuffle_data); + + +static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { +   /* Shuffle used by CVT_64_FLOAT +    */ +   { +      0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, +      0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +   }, + +   /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED +    */ +   { +      0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80, +      0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80, +   }, +    +   /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED +    */ +   { +      0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80, +      0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80, +   }, +    +   /* High value shuffle used by trans4x4. +    */ +   { +      0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, +      0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17 +   }, + +   /* Low value shuffle used by trans4x4. +    */ +   { +      0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, +      0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F +   } +}; + + +/** + * Fetch vertex attributes for 'count' vertices. + */ +static void generic_vertex_fetch(struct spu_vs_context *draw, +                                 struct spu_exec_machine *machine, +                                 const unsigned *elts, +                                 unsigned count) +{ +   unsigned nr_attrs = draw->vertex_fetch.nr_attrs; +   unsigned attr; + +   ASSERT(count <= 4); + +#if DRAW_DBG +   printf("SPU: %s count = %u, nr_attrs = %u\n",  +          __FUNCTION__, count, nr_attrs); +#endif + +   /* loop over vertex attributes (vertex shader inputs) +    */ +   for (attr = 0; attr < nr_attrs; attr++) { +      const unsigned pitch = draw->vertex_fetch.pitch[attr]; +      const uint64_t src = draw->vertex_fetch.src_ptr[attr]; +      const spu_fetch_func fetch = (spu_fetch_func) +	  (draw->vertex_fetch.code + draw->vertex_fetch.code_offset[attr]); +      unsigned i; +      unsigned idx; +      const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; +      const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; +      qword in[2 * 4] ALIGN16_ATTRIB; + + +      /* Fetch four attributes for four vertices.   +       */ +      idx = 0; +      for (i = 0; i < count; i++) { +         const uint64_t addr = src + (elts[i] * pitch); + +#if DRAW_DBG +         printf("SPU: fetching = 0x%llx\n", addr); +#endif + +         spu_dcache_fetch_unaligned(& in[idx], addr, bytes_per_entry); +         idx += quads_per_entry; +      } + +      /* Be nice and zero out any missing vertices. +       */ +      (void) memset(& in[idx], 0, (8 - idx) * sizeof(qword)); + + +      /* Convert all 4 vertices to vectors of float. +       */ +      (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, fetch_shuffle_data); +   } +} + + +void spu_update_vertex_fetch( struct spu_vs_context *draw ) +{ +   draw->vertex_fetch.fetch_func = generic_vertex_fetch; +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c new file mode 100644 index 0000000000..fbe5b34d39 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -0,0 +1,244 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  *   Brian Paul +  *   Ian Romanick <idr@us.ibm.com> +  */ + +#include <spu_mfcio.h> + +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "draw/draw_private.h" +#include "draw/draw_context.h" +#include "cell/common.h" +#include "spu_vertex_shader.h" +#include "spu_exec.h" +#include "spu_main.h" + + +#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) + + +#define CLIP_RIGHT_BIT 0x01 +#define CLIP_LEFT_BIT 0x02 +#define CLIP_TOP_BIT 0x04 +#define CLIP_BOTTOM_BIT 0x08 +#define CLIP_FAR_BIT 0x10 +#define CLIP_NEAR_BIT 0x20 + + +static INLINE float +dot4(const float *a, const float *b) +{ +   return (a[0]*b[0] + +           a[1]*b[1] + +           a[2]*b[2] + +           a[3]*b[3]); +} + +static INLINE unsigned +compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ +   unsigned mask = 0; +   unsigned i; + +   /* Do the hardwired planes first: +    */ +   if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; +   if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; +   if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; +   if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; +   if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; +   if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; + +   /* Followed by any remaining ones: +    */ +   for (i = 6; i < nr; i++) { +      if (dot4(clip, plane[i]) < 0)  +         mask |= (1<<i); +   } + +   return mask; +} + + +/** + * Transform vertices with the current vertex program/shader + * Up to four vertices can be shaded at a time. + * \param vbuffer  the input vertex data + * \param elts  indexes of four input vertices + * \param count  number of vertices to shade [1..4] + * \param vOut  array of pointers to four output vertices + */ +static void +run_vertex_program(struct spu_vs_context *draw, +                   unsigned elts[4], unsigned count, +                   const uint64_t *vOut) +{ +   struct spu_exec_machine *machine = &draw->machine; +   unsigned int j; + +   ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS); +   ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS); +   const float *scale = draw->viewport.scale; +   const float *trans = draw->viewport.translate; + +   ASSERT(count <= 4); + +   machine->Processor = TGSI_PROCESSOR_VERTEX; + +   ASSERT_ALIGN16(draw->constants); +   machine->Consts = (float (*)[4]) draw->constants; + +   machine->Inputs = ALIGN16_ASSIGN(inputs); +   machine->Outputs = ALIGN16_ASSIGN(outputs); + +   spu_vertex_fetch( draw, machine, elts, count ); + +   /* run shader */ +   spu_exec_machine_run( machine ); + + +   /* store machine results */ +   for (j = 0; j < count; j++) { +      unsigned slot; +      float x, y, z, w; +      unsigned char buffer[sizeof(struct vertex_header) +          + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; +      struct vertex_header *const tmpOut = +          (struct vertex_header *) buffer; +      const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) +                                           + (sizeof(float) * 4  +                                              * draw->num_vs_outputs)); + +      mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); +      wait_on_mask(1 << TAG_VERTEX_BUFFER); + + +      /* Handle attr[0] (position) specially: +       * +       * XXX: Computing the clipmask should be done in the vertex +       * program as a set of DP4 instructions appended to the +       * user-provided code. +       */ +      x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j]; +      y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j]; +      z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j]; +      w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + +      tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane, +					   draw->nr_planes); +      tmpOut->edgeflag = 1; + +      /* divide by w */ +      w = 1.0f / w; +      x *= w; +      y *= w; +      z *= w; + +      /* Viewport mapping */ +      tmpOut->data[0][0] = x * scale[0] + trans[0]; +      tmpOut->data[0][1] = y * scale[1] + trans[1]; +      tmpOut->data[0][2] = z * scale[2] + trans[2]; +      tmpOut->data[0][3] = w; + +      /* Remaining attributes are packed into sequential post-transform +       * vertex attrib slots. +       */ +      for (slot = 1; slot < draw->num_vs_outputs; slot++) { +         tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; +         tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; +         tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; +         tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; +      } + +      mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); +   } /* loop over vertices */ +} + + +unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] +    ALIGN16_ATTRIB; + + +void +spu_bind_vertex_shader(struct spu_vs_context *draw, +		       struct cell_shader_info *vs) +{ +   const unsigned immediate_addr = vs->immediates; +   const unsigned immediate_size =  +       ROUNDUP16((sizeof(float) * 4 * vs->num_immediates) +		 + (immediate_addr & 0x0f)); +  + +   mfc_get(immediates, immediate_addr & ~0x0f, immediate_size, +           TAG_VERTEX_BUFFER, 0, 0); + +   draw->machine.Instructions = (struct tgsi_full_instruction *) +       vs->instructions; +   draw->machine.NumInstructions = vs->num_instructions; + +   draw->machine.Declarations = (struct tgsi_full_declaration *) +       vs->declarations; +   draw->machine.NumDeclarations = vs->num_declarations; + +   draw->num_vs_outputs = vs->num_outputs; + +   /* specify the shader to interpret/execute */ +   spu_exec_machine_init(&draw->machine, +			 PIPE_MAX_SAMPLERS, +			 NULL /*samplers*/, +			 PIPE_SHADER_VERTEX); + +   wait_on_mask(1 << TAG_VERTEX_BUFFER); + +   (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f], +                 sizeof(float) * 4 * vs->num_immediates); +} + + +void +spu_execute_vertex_shader(struct spu_vs_context *draw, +                          const struct cell_command_vs *vs) +{ +   unsigned i; + +   (void) memcpy(draw->plane, vs->plane, sizeof(float) * 4 * vs->nr_planes); +   draw->nr_planes = vs->nr_planes; +   draw->vertex_fetch.nr_attrs = vs->nr_attrs; + +   for (i = 0; i < vs->num_elts; i += 4) { +      const unsigned batch_size = MIN2(vs->num_elts - i, 4); + +      run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]); +   } +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h new file mode 100644 index 0000000000..4c74f5e74d --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.h @@ -0,0 +1,66 @@ +#ifndef SPU_VERTEX_SHADER_H +#define SPU_VERTEX_SHADER_H + +#include "cell/common.h" +#include "pipe/p_format.h" +#include "spu_exec.h" + +struct spu_vs_context; + +typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw, +				     struct spu_exec_machine *machine, +				     const unsigned *elts, +				     unsigned count ); + +struct spu_vs_context { +   struct pipe_viewport_state viewport; + +   struct { +      uint64_t src_ptr[PIPE_MAX_ATTRIBS]; +      unsigned pitch[PIPE_MAX_ATTRIBS]; +      unsigned size[PIPE_MAX_ATTRIBS]; +      unsigned code_offset[PIPE_MAX_ATTRIBS]; +      unsigned nr_attrs; +      boolean dirty; + +      spu_full_fetch_func fetch_func; +      void *code; +   } vertex_fetch; +    +   /* Clip derived state: +    */ +   float plane[12][4]; +   unsigned nr_planes; + +   struct spu_exec_machine machine; +   const float (*constants)[4]; + +   unsigned num_vs_outputs; +}; + +extern void spu_update_vertex_fetch(struct spu_vs_context *draw); + +static INLINE void spu_vertex_fetch(struct spu_vs_context *draw, +				    struct spu_exec_machine *machine, +				    const unsigned *elts, +				    unsigned count) +{ +   if (draw->vertex_fetch.dirty) { +      spu_update_vertex_fetch(draw); +      draw->vertex_fetch.dirty = 0; +   } +    +   (*draw->vertex_fetch.fetch_func)(draw, machine, elts, count); +} + +struct cell_command_vs; + +extern void +spu_bind_vertex_shader(struct spu_vs_context *draw, +		       struct cell_shader_info *vs); + +extern void +spu_execute_vertex_shader(struct spu_vs_context *draw, +			  const struct cell_command_vs *vs); + +#endif /* SPU_VERTEX_SHADER_H */ diff --git a/src/gallium/drivers/failover/Makefile b/src/gallium/drivers/failover/Makefile new file mode 100644 index 0000000000..f08b8df07a --- /dev/null +++ b/src/gallium/drivers/failover/Makefile @@ -0,0 +1,14 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = failover + +C_SOURCES = \ +	fo_state.c \ +	fo_state_emit.c \ +	fo_context.c  + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/failover/SConscript b/src/gallium/drivers/failover/SConscript new file mode 100644 index 0000000000..f8e9b1b491 --- /dev/null +++ b/src/gallium/drivers/failover/SConscript @@ -0,0 +1,13 @@ +Import('*') + +env = env.Clone() + +failover = env.ConvenienceLibrary( +	target = 'failover', +	source = [ +		'fo_state.c', +		'fo_state_emit.c', +		'fo_context.c', +	]) + +Export('failover') diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c new file mode 100644 index 0000000000..0742b27b8f --- /dev/null +++ b/src/gallium/drivers/failover/fo_context.c @@ -0,0 +1,159 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_memory.h" +#include "pipe/p_context.h" + +#include "fo_context.h" +#include "fo_winsys.h" + + + +static void failover_destroy( struct pipe_context *pipe ) +{ +   struct failover_context *failover = failover_context( pipe ); + +   free( failover ); +} + + + +static boolean failover_draw_elements( struct pipe_context *pipe, +				       struct pipe_buffer *indexBuffer, +				       unsigned indexSize, +				       unsigned prim, unsigned start, unsigned count) +{ +   struct failover_context *failover = failover_context( pipe ); + +   /* If there has been any statechange since last time, try hardware +    * rendering again: +    */ +   if (failover->dirty) { +      failover->mode = FO_HW; +   } + +   /* Try hardware: +    */ +   if (failover->mode == FO_HW) { +      if (!failover->hw->draw_elements( failover->hw,  +					indexBuffer,  +					indexSize,  +					prim,  +					start,  +					count )) { + +	 failover->hw->flush( failover->hw, ~0, NULL ); +	 failover->mode = FO_SW; +      } +   } + +   /* Possibly try software: +    */ +   if (failover->mode == FO_SW) { + +      if (failover->dirty)  +	 failover_state_emit( failover ); + +      failover->sw->draw_elements( failover->sw,  +				   indexBuffer,  +				   indexSize,  +				   prim,  +				   start,  +				   count ); + +      /* Be ready to switch back to hardware rendering without an +       * intervening flush.  Unlikely to be much performance impact to +       * this: +       */ +      failover->sw->flush( failover->sw, ~0, NULL ); +   } + +   return TRUE; +} + + +static boolean failover_draw_arrays( struct pipe_context *pipe, +				     unsigned prim, unsigned start, unsigned count) +{ +   return failover_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + +struct pipe_context *failover_create( struct pipe_context *hw, +				      struct pipe_context *sw ) +{ +   struct failover_context *failover = CALLOC_STRUCT(failover_context); +   if (failover == NULL) +      return NULL; + +   failover->hw = hw; +   failover->sw = sw; +   failover->pipe.winsys = hw->winsys; +   failover->pipe.screen = hw->screen; +   failover->pipe.destroy = failover_destroy; +#if 0 +   failover->pipe.is_format_supported = hw->is_format_supported; +   failover->pipe.get_name = hw->get_name; +   failover->pipe.get_vendor = hw->get_vendor; +   failover->pipe.get_param = hw->get_param; +   failover->pipe.get_paramf = hw->get_paramf; +#endif + +   failover->pipe.draw_arrays = failover_draw_arrays; +   failover->pipe.draw_elements = failover_draw_elements; +   failover->pipe.clear = hw->clear; + +   /* No software occlusion fallback (or other optional functionality) +    * at this point - if the hardware doesn't support it, don't +    * advertise it to the application. +    */ +   failover->pipe.begin_query = hw->begin_query; +   failover->pipe.end_query = hw->end_query; + +   failover_init_state_functions( failover ); + +   failover->pipe.surface_copy = hw->surface_copy; +   failover->pipe.surface_fill = hw->surface_fill; + +#if 0 +   failover->pipe.texture_create = hw->texture_create; +   failover->pipe.texture_release = hw->texture_release; +   failover->pipe.get_tex_surface = hw->get_tex_surface; +   failover->pipe.texture_update = hw->texture_update; +#endif + +   failover->pipe.flush = hw->flush; + +   failover->dirty = 0; + +   return &failover->pipe; +} + diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h new file mode 100644 index 0000000000..9ba86ba866 --- /dev/null +++ b/src/gallium/drivers/failover/fo_context.h @@ -0,0 +1,125 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef FO_CONTEXT_H +#define FO_CONTEXT_H + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + + + +#define FO_NEW_VIEWPORT        0x1 +#define FO_NEW_RASTERIZER      0x2 +#define FO_NEW_FRAGMENT_SHADER 0x4 +#define FO_NEW_BLEND           0x8 +#define FO_NEW_CLIP            0x10 +#define FO_NEW_SCISSOR         0x20 +#define FO_NEW_STIPPLE         0x40 +#define FO_NEW_FRAMEBUFFER     0x80 +#define FO_NEW_ALPHA_TEST      0x100 +#define FO_NEW_DEPTH_STENCIL   0x200 +#define FO_NEW_SAMPLER         0x400 +#define FO_NEW_TEXTURE         0x800 +#define FO_NEW_VERTEX          0x2000 +#define FO_NEW_VERTEX_SHADER   0x4000 +#define FO_NEW_BLEND_COLOR     0x8000 +#define FO_NEW_CLEAR_COLOR     0x10000 +#define FO_NEW_VERTEX_BUFFER   0x20000 +#define FO_NEW_VERTEX_ELEMENT  0x40000 + + + +#define FO_HW 0 +#define FO_SW 1 + +struct fo_state { +   void *sw_state; +   void *hw_state; +}; +struct failover_context { +   struct pipe_context pipe;  /**< base class */ + + +   /* The most recent drawing state as set by the driver: +    */ +   const struct fo_state     *blend; +   const struct fo_state     *sampler[PIPE_MAX_SAMPLERS]; +   const struct fo_state     *depth_stencil; +   const struct fo_state     *rasterizer; +   const struct fo_state     *fragment_shader; +   const struct fo_state     *vertex_shader; + +   struct pipe_blend_color blend_color; +   struct pipe_clip_state clip; +   struct pipe_framebuffer_state framebuffer; +   struct pipe_poly_stipple poly_stipple; +   struct pipe_scissor_state scissor; +   struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; +   struct pipe_viewport_state viewport; +   struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; +   struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS]; + +   uint num_vertex_buffers; +   uint num_vertex_elements; + +   void *sw_sampler_state[PIPE_MAX_SAMPLERS]; +   void *hw_sampler_state[PIPE_MAX_SAMPLERS]; + +   unsigned dirty; + +   unsigned num_samplers; +   unsigned num_textures; + +   unsigned mode; +   struct pipe_context *hw; +   struct pipe_context *sw; +}; + + + +void failover_init_state_functions( struct failover_context *failover ); +void failover_state_emit( struct failover_context *failover ); + +static INLINE struct failover_context * +failover_context( struct pipe_context *pipe ) +{ +   return (struct failover_context *)pipe; +} + +/* Internal functions + */ +void +failover_set_constant_buffer(struct pipe_context *pipe, +                             uint shader, uint index, +                             const struct pipe_constant_buffer *buf); + + +#endif /* FO_CONTEXT_H */ diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c new file mode 100644 index 0000000000..6a79706632 --- /dev/null +++ b/src/gallium/drivers/failover/fo_state.c @@ -0,0 +1,483 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_inlines.h" + +#include "fo_context.h" + + +/* This looks like a lot of work at the moment - we're keeping a + * duplicate copy of the state up-to-date.   + * + * This can change in two ways: + * - With constant state objects we would only need to save a pointer, + *     not the whole object. + * - By adding a callback in the state tracker to re-emit state.  The + *     state tracker knows the current state already and can re-emit it  + *     without additional complexity. + * + * This works as a proof-of-concept, but a final version will have + * lower overheads. + */ + + + +static void * +failover_create_blend_state( struct pipe_context *pipe, +                             const struct pipe_blend_state *blend ) +{ +   struct fo_state *state = malloc(sizeof(struct fo_state)); +   struct failover_context *failover = failover_context(pipe); + +   state->sw_state = failover->sw->create_blend_state(failover->sw, blend); +   state->hw_state = failover->hw->create_blend_state(failover->hw, blend); + +   return state; +} + +static void +failover_bind_blend_state( struct pipe_context *pipe, +                           void *blend ) +{ +   struct failover_context *failover = failover_context(pipe); +   struct fo_state *state = (struct fo_state *)blend; +   failover->blend = state; +   failover->dirty |= FO_NEW_BLEND; +   failover->sw->bind_blend_state( failover->sw, state->sw_state ); +   failover->hw->bind_blend_state( failover->hw, state->hw_state ); +} + +static void +failover_delete_blend_state( struct pipe_context *pipe, +                             void *blend ) +{ +   struct fo_state *state = (struct fo_state*)blend; +   struct failover_context *failover = failover_context(pipe); + +   failover->sw->delete_blend_state(failover->sw, state->sw_state); +   failover->hw->delete_blend_state(failover->hw, state->hw_state); +   state->sw_state = 0; +   state->hw_state = 0; +   free(state); +} + +static void +failover_set_blend_color( struct pipe_context *pipe, +			  const struct pipe_blend_color *blend_color ) +{ +   struct failover_context *failover = failover_context(pipe); + +   failover->blend_color = *blend_color; +   failover->dirty |= FO_NEW_BLEND_COLOR; +   failover->sw->set_blend_color( failover->sw, blend_color ); +   failover->hw->set_blend_color( failover->hw, blend_color ); +} + +static void  +failover_set_clip_state( struct pipe_context *pipe, +			 const struct pipe_clip_state *clip ) +{ +   struct failover_context *failover = failover_context(pipe); + +   failover->clip = *clip; +   failover->dirty |= FO_NEW_CLIP; +   failover->sw->set_clip_state( failover->sw, clip ); +   failover->hw->set_clip_state( failover->hw, clip ); +} + + +static void * +failover_create_depth_stencil_state(struct pipe_context *pipe, +                              const struct pipe_depth_stencil_alpha_state *templ) +{ +   struct fo_state *state = malloc(sizeof(struct fo_state)); +   struct failover_context *failover = failover_context(pipe); + +   state->sw_state = failover->sw->create_depth_stencil_alpha_state(failover->sw, templ); +   state->hw_state = failover->hw->create_depth_stencil_alpha_state(failover->hw, templ); + +   return state; +} + +static void +failover_bind_depth_stencil_state(struct pipe_context *pipe, +                                  void *depth_stencil) +{ +   struct failover_context *failover = failover_context(pipe); +   struct fo_state *state = (struct fo_state *)depth_stencil; +   failover->depth_stencil = state; +   failover->dirty |= FO_NEW_DEPTH_STENCIL; +   failover->sw->bind_depth_stencil_alpha_state(failover->sw, state->sw_state); +   failover->hw->bind_depth_stencil_alpha_state(failover->hw, state->hw_state); +} + +static void +failover_delete_depth_stencil_state(struct pipe_context *pipe, +                                    void *ds) +{ +   struct fo_state *state = (struct fo_state*)ds; +   struct failover_context *failover = failover_context(pipe); + +   failover->sw->delete_depth_stencil_alpha_state(failover->sw, state->sw_state); +   failover->hw->delete_depth_stencil_alpha_state(failover->hw, state->hw_state); +   state->sw_state = 0; +   state->hw_state = 0; +   free(state); +} + +static void +failover_set_framebuffer_state(struct pipe_context *pipe, +			       const struct pipe_framebuffer_state *framebuffer) +{ +   struct failover_context *failover = failover_context(pipe); + +   failover->framebuffer = *framebuffer; +   failover->dirty |= FO_NEW_FRAMEBUFFER; +   failover->sw->set_framebuffer_state( failover->sw, framebuffer ); +   failover->hw->set_framebuffer_state( failover->hw, framebuffer ); +} + + +static void * +failover_create_fs_state(struct pipe_context *pipe, +                         const struct pipe_shader_state *templ) +{ +   struct fo_state *state = malloc(sizeof(struct fo_state)); +   struct failover_context *failover = failover_context(pipe); + +   state->sw_state = failover->sw->create_fs_state(failover->sw, templ); +   state->hw_state = failover->hw->create_fs_state(failover->hw, templ); + +   return state; +} + +static void +failover_bind_fs_state(struct pipe_context *pipe, void *fs) +{ +   struct failover_context *failover = failover_context(pipe); +   struct fo_state *state = (struct fo_state*)fs; +   failover->fragment_shader = state; +   failover->dirty |= FO_NEW_FRAGMENT_SHADER; +   failover->sw->bind_fs_state(failover->sw, state->sw_state); +   failover->hw->bind_fs_state(failover->hw, state->hw_state); +} + +static void +failover_delete_fs_state(struct pipe_context *pipe, +                         void *fs) +{ +   struct fo_state *state = (struct fo_state*)fs; +   struct failover_context *failover = failover_context(pipe); + +   failover->sw->delete_fs_state(failover->sw, state->sw_state); +   failover->hw->delete_fs_state(failover->hw, state->hw_state); +   state->sw_state = 0; +   state->hw_state = 0; +   free(state); +} + +static void * +failover_create_vs_state(struct pipe_context *pipe, +                         const struct pipe_shader_state *templ) +{ +   struct fo_state *state = malloc(sizeof(struct fo_state)); +   struct failover_context *failover = failover_context(pipe); + +   state->sw_state = failover->sw->create_vs_state(failover->sw, templ); +   state->hw_state = failover->hw->create_vs_state(failover->hw, templ); + +   return state; +} + +static void +failover_bind_vs_state(struct pipe_context *pipe, +                       void *vs) +{ +   struct failover_context *failover = failover_context(pipe); + +   struct fo_state *state = (struct fo_state*)vs; +   failover->vertex_shader = state; +   failover->dirty |= FO_NEW_VERTEX_SHADER; +   failover->sw->bind_vs_state(failover->sw, state->sw_state); +   failover->hw->bind_vs_state(failover->hw, state->hw_state); +} + +static void +failover_delete_vs_state(struct pipe_context *pipe, +                         void *vs) +{ +   struct fo_state *state = (struct fo_state*)vs; +   struct failover_context *failover = failover_context(pipe); + +   failover->sw->delete_vs_state(failover->sw, state->sw_state); +   failover->hw->delete_vs_state(failover->hw, state->hw_state); +   state->sw_state = 0; +   state->hw_state = 0; +   free(state); +} + +static void  +failover_set_polygon_stipple( struct pipe_context *pipe, +			      const struct pipe_poly_stipple *stipple ) +{ +   struct failover_context *failover = failover_context(pipe); + +   failover->poly_stipple = *stipple; +   failover->dirty |= FO_NEW_STIPPLE; +   failover->sw->set_polygon_stipple( failover->sw, stipple ); +   failover->hw->set_polygon_stipple( failover->hw, stipple ); +} + + +static void * +failover_create_rasterizer_state(struct pipe_context *pipe, +                                 const struct pipe_rasterizer_state *templ) +{ +   struct fo_state *state = malloc(sizeof(struct fo_state)); +   struct failover_context *failover = failover_context(pipe); + +   state->sw_state = failover->sw->create_rasterizer_state(failover->sw, templ); +   state->hw_state = failover->hw->create_rasterizer_state(failover->hw, templ); + +   return state; +} + +static void +failover_bind_rasterizer_state(struct pipe_context *pipe, +                               void *raster) +{ +   struct failover_context *failover = failover_context(pipe); + +   struct fo_state *state = (struct fo_state*)raster; +   failover->rasterizer = state; +   failover->dirty |= FO_NEW_RASTERIZER; +   failover->sw->bind_rasterizer_state(failover->sw, state->sw_state); +   failover->hw->bind_rasterizer_state(failover->hw, state->hw_state); +} + +static void +failover_delete_rasterizer_state(struct pipe_context *pipe, +                                 void *raster) +{ +   struct fo_state *state = (struct fo_state*)raster; +   struct failover_context *failover = failover_context(pipe); + +   failover->sw->delete_rasterizer_state(failover->sw, state->sw_state); +   failover->hw->delete_rasterizer_state(failover->hw, state->hw_state); +   state->sw_state = 0; +   state->hw_state = 0; +   free(state); +} + + +static void  +failover_set_scissor_state( struct pipe_context *pipe, +                                 const struct pipe_scissor_state *scissor ) +{ +   struct failover_context *failover = failover_context(pipe); + +   failover->scissor = *scissor; +   failover->dirty |= FO_NEW_SCISSOR; +   failover->sw->set_scissor_state( failover->sw, scissor ); +   failover->hw->set_scissor_state( failover->hw, scissor ); +} + + +static void * +failover_create_sampler_state(struct pipe_context *pipe, +                              const struct pipe_sampler_state *templ) +{ +   struct fo_state *state = malloc(sizeof(struct fo_state)); +   struct failover_context *failover = failover_context(pipe); + +   state->sw_state = failover->sw->create_sampler_state(failover->sw, templ); +   state->hw_state = failover->hw->create_sampler_state(failover->hw, templ); + +   return state; +} + +static void +failover_bind_sampler_states(struct pipe_context *pipe, +                             unsigned num, void **sampler) +{ +   struct failover_context *failover = failover_context(pipe); +   struct fo_state *state = (struct fo_state*)sampler; +   uint i; +   assert(num <= PIPE_MAX_SAMPLERS); +   /* Check for no-op */ +   if (num == failover->num_samplers && +       !memcmp(failover->sampler, sampler, num * sizeof(void *))) +      return; +   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { +      failover->sw_sampler_state[i] = i < num ? state[i].sw_state : NULL; +      failover->hw_sampler_state[i] = i < num ? state[i].hw_state : NULL; +   } +   failover->dirty |= FO_NEW_SAMPLER; +   failover->num_samplers = num; +   failover->sw->bind_sampler_states(failover->sw, num, +                                     failover->sw_sampler_state); +   failover->hw->bind_sampler_states(failover->hw, num, +                                     failover->hw_sampler_state); +} + +static void +failover_delete_sampler_state(struct pipe_context *pipe, void *sampler) +{ +   struct fo_state *state = (struct fo_state*)sampler; +   struct failover_context *failover = failover_context(pipe); + +   failover->sw->delete_sampler_state(failover->sw, state->sw_state); +   failover->hw->delete_sampler_state(failover->hw, state->hw_state); +   state->sw_state = 0; +   state->hw_state = 0; +   free(state); +} + + +static void +failover_set_sampler_textures(struct pipe_context *pipe, +                              unsigned num, +                              struct pipe_texture **texture) +{ +   struct failover_context *failover = failover_context(pipe); +   uint i; + +   assert(num <= PIPE_MAX_SAMPLERS); + +   /* Check for no-op */ +   if (num == failover->num_textures && +       !memcmp(failover->texture, texture, num * sizeof(struct pipe_texture *))) +      return; +   for (i = 0; i < num; i++) +      pipe_texture_reference((struct pipe_texture **) &failover->texture[i], +                             texture[i]); +   for (i = num; i < failover->num_textures; i++) +      pipe_texture_reference((struct pipe_texture **) &failover->texture[i], +                             NULL); +   failover->dirty |= FO_NEW_TEXTURE; +   failover->num_textures = num; +   failover->sw->set_sampler_textures( failover->sw, num, texture ); +   failover->hw->set_sampler_textures( failover->hw, num, texture ); +} + + +static void  +failover_set_viewport_state( struct pipe_context *pipe, +			     const struct pipe_viewport_state *viewport ) +{ +   struct failover_context *failover = failover_context(pipe); + +   failover->viewport = *viewport;  +   failover->dirty |= FO_NEW_VIEWPORT; +   failover->sw->set_viewport_state( failover->sw, viewport ); +   failover->hw->set_viewport_state( failover->hw, viewport ); +} + + +static void +failover_set_vertex_buffers(struct pipe_context *pipe, +                            unsigned count, +                            const struct pipe_vertex_buffer *vertex_buffers) +{ +   struct failover_context *failover = failover_context(pipe); + +   memcpy(failover->vertex_buffers, vertex_buffers, +          count * sizeof(vertex_buffers[0])); +   failover->dirty |= FO_NEW_VERTEX_BUFFER; +   failover->num_vertex_buffers = count; +   failover->sw->set_vertex_buffers( failover->sw, count, vertex_buffers ); +   failover->hw->set_vertex_buffers( failover->hw, count, vertex_buffers ); +} + + +static void +failover_set_vertex_elements(struct pipe_context *pipe, +                             unsigned count, +                             const struct pipe_vertex_element *vertex_elements) +{ +   struct failover_context *failover = failover_context(pipe); + +   memcpy(failover->vertex_elements, vertex_elements, +          count * sizeof(vertex_elements[0])); + +   failover->dirty |= FO_NEW_VERTEX_ELEMENT; +   failover->num_vertex_elements = count; +   failover->sw->set_vertex_elements( failover->sw, count, vertex_elements ); +   failover->hw->set_vertex_elements( failover->hw, count, vertex_elements ); +} + +void +failover_set_constant_buffer(struct pipe_context *pipe, +                             uint shader, uint index, +                             const struct pipe_constant_buffer *buf) +{ +   struct failover_context *failover = failover_context(pipe); + +   assert(shader < PIPE_SHADER_TYPES); +   assert(index == 0); + +   failover->sw->set_constant_buffer(failover->sw, shader, index, buf); +   failover->hw->set_constant_buffer(failover->hw, shader, index, buf); +} + + +void +failover_init_state_functions( struct failover_context *failover ) +{ +   failover->pipe.create_blend_state = failover_create_blend_state; +   failover->pipe.bind_blend_state   = failover_bind_blend_state; +   failover->pipe.delete_blend_state = failover_delete_blend_state; +   failover->pipe.create_sampler_state = failover_create_sampler_state; +   failover->pipe.bind_sampler_states  = failover_bind_sampler_states; +   failover->pipe.delete_sampler_state = failover_delete_sampler_state; +   failover->pipe.create_depth_stencil_alpha_state = failover_create_depth_stencil_state; +   failover->pipe.bind_depth_stencil_alpha_state   = failover_bind_depth_stencil_state; +   failover->pipe.delete_depth_stencil_alpha_state = failover_delete_depth_stencil_state; +   failover->pipe.create_rasterizer_state = failover_create_rasterizer_state; +   failover->pipe.bind_rasterizer_state = failover_bind_rasterizer_state; +   failover->pipe.delete_rasterizer_state = failover_delete_rasterizer_state; +   failover->pipe.create_fs_state = failover_create_fs_state; +   failover->pipe.bind_fs_state   = failover_bind_fs_state; +   failover->pipe.delete_fs_state = failover_delete_fs_state; +   failover->pipe.create_vs_state = failover_create_vs_state; +   failover->pipe.bind_vs_state   = failover_bind_vs_state; +   failover->pipe.delete_vs_state = failover_delete_vs_state; + +   failover->pipe.set_blend_color = failover_set_blend_color; +   failover->pipe.set_clip_state = failover_set_clip_state; +   failover->pipe.set_framebuffer_state = failover_set_framebuffer_state; +   failover->pipe.set_polygon_stipple = failover_set_polygon_stipple; +   failover->pipe.set_scissor_state = failover_set_scissor_state; +   failover->pipe.set_sampler_textures = failover_set_sampler_textures; +   failover->pipe.set_viewport_state = failover_set_viewport_state; +   failover->pipe.set_vertex_buffers = failover_set_vertex_buffers; +   failover->pipe.set_vertex_elements = failover_set_vertex_elements; +   failover->pipe.set_constant_buffer = failover_set_constant_buffer; +} diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c new file mode 100644 index 0000000000..bd4fce9d20 --- /dev/null +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -0,0 +1,117 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "fo_context.h" + +/* This looks like a lot of work at the moment - we're keeping a + * duplicate copy of the state up-to-date.   + * + * This can change in two ways: + * - With constant state objects we would only need to save a pointer, + *     not the whole object. + * - By adding a callback in the state tracker to re-emit state.  The + *     state tracker knows the current state already and can re-emit it  + *     without additional complexity. + * + * This works as a proof-of-concept, but a final version will have + * lower overheads. + */ + + +/* Bring the software pipe uptodate with current state. + *  + * With constant state objects we would probably just send all state + * to both rasterizers all the time??? + */ +void +failover_state_emit( struct failover_context *failover ) +{ +   if (failover->dirty & FO_NEW_BLEND) +      failover->sw->bind_blend_state( failover->sw, +                                      failover->blend->sw_state ); + +   if (failover->dirty & FO_NEW_BLEND_COLOR) +      failover->sw->set_blend_color( failover->sw, &failover->blend_color ); + +   if (failover->dirty & FO_NEW_CLIP) +      failover->sw->set_clip_state( failover->sw, &failover->clip ); + +   if (failover->dirty & FO_NEW_DEPTH_STENCIL) +      failover->sw->bind_depth_stencil_alpha_state( failover->sw, +						    failover->depth_stencil->sw_state ); + +   if (failover->dirty & FO_NEW_FRAMEBUFFER) +      failover->sw->set_framebuffer_state( failover->sw, &failover->framebuffer ); + +   if (failover->dirty & FO_NEW_FRAGMENT_SHADER) +      failover->sw->bind_fs_state( failover->sw, +                                   failover->fragment_shader->sw_state ); + +   if (failover->dirty & FO_NEW_VERTEX_SHADER) +      failover->sw->bind_vs_state( failover->sw, +                                   failover->vertex_shader->sw_state ); + +   if (failover->dirty & FO_NEW_STIPPLE) +      failover->sw->set_polygon_stipple( failover->sw, &failover->poly_stipple ); + +   if (failover->dirty & FO_NEW_RASTERIZER) +      failover->sw->bind_rasterizer_state( failover->sw, +                                           failover->rasterizer->sw_state ); + +   if (failover->dirty & FO_NEW_SCISSOR) +      failover->sw->set_scissor_state( failover->sw, &failover->scissor ); + +   if (failover->dirty & FO_NEW_VIEWPORT) +      failover->sw->set_viewport_state( failover->sw, &failover->viewport ); + +   if (failover->dirty & FO_NEW_SAMPLER) { +      failover->sw->bind_sampler_states( failover->sw, failover->num_samplers, +                                         failover->sw_sampler_state ); +   } + +   if (failover->dirty & FO_NEW_TEXTURE) { +      failover->sw->set_sampler_textures( failover->sw, failover->num_textures,  +                                          failover->texture ); +   } + +   if (failover->dirty & FO_NEW_VERTEX_BUFFER) { +      failover->sw->set_vertex_buffers( failover->sw, +                                        failover->num_vertex_buffers, +                                        failover->vertex_buffers ); +   } + +   if (failover->dirty & FO_NEW_VERTEX_ELEMENT) { +      failover->sw->set_vertex_elements( failover->sw, +                                         failover->num_vertex_elements, +                                         failover->vertex_elements ); +   } + +   failover->dirty = 0; +} diff --git a/src/gallium/drivers/failover/fo_winsys.h b/src/gallium/drivers/failover/fo_winsys.h new file mode 100644 index 0000000000..a8ce997a1f --- /dev/null +++ b/src/gallium/drivers/failover/fo_winsys.h @@ -0,0 +1,45 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef FO_WINSYS_H +#define FO_WINSYS_H + + +/* This is the interface that failover requires any window system + * hosting it to implement.  This is the only include file in failover + * which is public. + */ + + +struct pipe_context; + + +struct pipe_context *failover_create( struct pipe_context *hw, +				      struct pipe_context *sw ); + + +#endif /* FO_WINSYS_H */ diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile new file mode 100644 index 0000000000..41a61a0020 --- /dev/null +++ b/src/gallium/drivers/i915simple/Makefile @@ -0,0 +1,31 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = i915simple + +C_SOURCES = \ +	i915_blit.c \ +	i915_clear.c \ +	i915_flush.c \ +	i915_context.c \ +	i915_context.c \ +	i915_debug.c \ +	i915_debug_fp.c \ +	i915_state.c \ +	i915_state_immediate.c \ +	i915_state_dynamic.c \ +	i915_state_derived.c \ +	i915_state_emit.c \ +	i915_state_sampler.c \ +	i915_screen.c \ +	i915_prim_emit.c \ +	i915_prim_vbuf.c \ +	i915_texture.c \ +	i915_fpc_emit.c \ +	i915_fpc_translate.c \ +	i915_surface.c  + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/i915simple/SConscript b/src/gallium/drivers/i915simple/SConscript new file mode 100644 index 0000000000..2366e1247f --- /dev/null +++ b/src/gallium/drivers/i915simple/SConscript @@ -0,0 +1,29 @@ +Import('*') + +env = env.Clone() + +i915simple = env.ConvenienceLibrary( +	target = 'i915simple', +	source = [ +		'i915_blit.c', +		'i915_clear.c', +		'i915_context.c', +		'i915_debug.c', +		'i915_debug_fp.c', +		'i915_flush.c', +		'i915_fpc_emit.c', +		'i915_fpc_translate.c', +		'i915_prim_emit.c', +		'i915_prim_vbuf.c', +		'i915_screen.c', +		'i915_state.c', +		'i915_state_derived.c', +		'i915_state_dynamic.c', +		'i915_state_emit.c', +		'i915_state_immediate.c', +		'i915_state_sampler.c', +		'i915_surface.c', +		'i915_texture.c', +	]) + +Export('i915simple') diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h new file mode 100644 index 0000000000..a433cf054d --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_batch.h @@ -0,0 +1,116 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef I915_BATCH_H +#define I915_BATCH_H + +#include "i915_winsys.h" + +struct i915_batchbuffer +{ +   struct pipe_buffer *buffer; +   struct i915_winsys *winsys; + +   unsigned char *map; +   unsigned char *ptr; + +   size_t size; +   size_t actual_size; + +   size_t relocs; +   size_t max_relocs; +}; + +static INLINE boolean +i915_batchbuffer_check( struct i915_batchbuffer *batch, +			size_t dwords, +			size_t relocs ) +{ +   /** TODO JB: Check relocs */ +   return dwords * 4 <= batch->size - (batch->ptr - batch->map); +} + +static INLINE size_t +i915_batchbuffer_space( struct i915_batchbuffer *batch ) +{ +   return batch->size - (batch->ptr - batch->map); +} + +static INLINE void +i915_batchbuffer_dword( struct i915_batchbuffer *batch, +			unsigned dword ) +{ +   if (i915_batchbuffer_space(batch) < 4) +      return; + +   *(unsigned *)batch->ptr = dword; +   batch->ptr += 4; +} + +static INLINE void +i915_batchbuffer_write( struct i915_batchbuffer *batch, +			void *data, +			size_t size ) +{ +   if (i915_batchbuffer_space(batch) < size) +      return; + +   memcpy(data, batch->ptr, size); +   batch->ptr += size; +} + +static INLINE void +i915_batchbuffer_reloc( struct i915_batchbuffer *batch, +			struct pipe_buffer *buffer, +			size_t flags, +			size_t offset ) +{ +   batch->winsys->batch_reloc( batch->winsys, buffer, flags, offset ); +} + +static INLINE void +i915_batchbuffer_flush( struct i915_batchbuffer *batch, +			struct pipe_fence_handle **fence ) +{ +   batch->winsys->batch_flush( batch->winsys, fence ); +} + +#define BEGIN_BATCH( dwords, relocs ) \ +   (i915_batchbuffer_check( i915->batch, dwords, relocs )) + +#define OUT_BATCH( dword ) \ +   i915_batchbuffer_dword( i915->batch, dword ) + +#define OUT_RELOC( buf, flags, delta ) \ +   i915_batchbuffer_reloc( i915->batch, buf, flags, delta ) + +#define FLUSH_BATCH(fence) do {				\ +   i915->winsys->batch_flush( i915->winsys, fence );	\ +   i915->hardware_dirty = ~0;				\ +} while (0) + +#endif diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915simple/i915_blit.c new file mode 100644 index 0000000000..448a4708ce --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_blit.c @@ -0,0 +1,159 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_blit.h" +#include "i915_reg.h" +#include "i915_batch.h" +#include "i915_debug.h" + +#define FILE_DEBUG_FLAG DEBUG_BLIT + +void +i915_fill_blit(struct i915_context *i915, +	       unsigned cpp, +	       unsigned short dst_pitch, +	       struct pipe_buffer *dst_buffer, +	       unsigned dst_offset, +	       short x, short y,  +	       short w, short h,  +	       unsigned color) +{ +   unsigned BR13, CMD; + + +   I915_DBG(i915, +       "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", +       __FUNCTION__, +       dst_buffer, dst_pitch, dst_offset, x, y, w, h); + +   switch (cpp) { +   case 1: +   case 2: +   case 3: +      BR13 = (((int) dst_pitch) & 0xffff) | +	 (0xF0 << 16) | (1 << 24); +      CMD = XY_COLOR_BLT_CMD; +      break; +   case 4: +      BR13 = (((int) dst_pitch) & 0xffff) | +	 (0xF0 << 16) | (1 << 24) | (1 << 25); +      CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA | +             XY_COLOR_BLT_WRITE_RGB); +      break; +   default: +      return; +   } + +   if (!BEGIN_BATCH(6, 1)) { +      FLUSH_BATCH(NULL); +      assert(BEGIN_BATCH(6, 1)); +   } +   OUT_BATCH(CMD); +   OUT_BATCH(BR13); +   OUT_BATCH((y << 16) | x); +   OUT_BATCH(((y + h) << 16) | (x + w)); +   OUT_RELOC( dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset); +   OUT_BATCH(color); +   FLUSH_BATCH(NULL); +} + + +void +i915_copy_blit( struct i915_context *i915, +                  unsigned do_flip, +                  unsigned cpp, +                  unsigned short src_pitch, +                  struct pipe_buffer *src_buffer, +                  unsigned src_offset, +                  unsigned short dst_pitch, +                  struct pipe_buffer *dst_buffer, +                  unsigned dst_offset, +                  short src_x, short src_y, +                  short dst_x, short dst_y,  +		  short w, short h ) +{ +   unsigned CMD, BR13; +   int dst_y2 = dst_y + h; +   int dst_x2 = dst_x + w; + + +   I915_DBG(i915, +       "%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", +       __FUNCTION__, +       src_buffer, src_pitch, src_offset, src_x, src_y, +       dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); + +   switch (cpp) { +   case 1: +   case 2: +   case 3: +      BR13 = (((int) dst_pitch) & 0xffff) | +	 (0xCC << 16) | (1 << 24); +      CMD = XY_SRC_COPY_BLT_CMD; +      break; +   case 4: +      BR13 = (((int) dst_pitch) & 0xffff) | +	 (0xCC << 16) | (1 << 24) | (1 << 25); +      CMD = +         (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | +          XY_SRC_COPY_BLT_WRITE_RGB); +      break; +   default: +      return; +   } + +   if (dst_y2 < dst_y ||  +       dst_x2 < dst_x) { +      return; +   } + +   /* Hardware can handle negative pitches but loses the ability to do +    * proper overlapping blits in that case.  We don't really have a +    * need for either at this stage. +    */ +   assert (dst_pitch > 0 && src_pitch > 0); + + +   if (!BEGIN_BATCH(8, 2)) { +      FLUSH_BATCH(NULL); +      assert(BEGIN_BATCH(8, 2)); +   } +   OUT_BATCH(CMD); +   OUT_BATCH(BR13); +   OUT_BATCH((dst_y << 16) | dst_x); +   OUT_BATCH((dst_y2 << 16) | dst_x2); +   OUT_RELOC(dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset); +   OUT_BATCH((src_y << 16) | src_x); +   OUT_BATCH(((int) src_pitch & 0xffff)); +   OUT_RELOC(src_buffer, I915_BUFFER_ACCESS_READ, src_offset); +   FLUSH_BATCH(NULL); +} + + diff --git a/src/gallium/drivers/i915simple/i915_blit.h b/src/gallium/drivers/i915simple/i915_blit.h new file mode 100644 index 0000000000..0bb3453861 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_blit.h @@ -0,0 +1,55 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef I915_BLIT_H +#define I915_BLIT_H + +#include "i915_context.h" + +extern void i915_copy_blit(struct i915_context *i915, +                           unsigned do_flip, +			   unsigned cpp, +			   unsigned short src_pitch, +			   struct pipe_buffer *src_buffer, +			   unsigned src_offset, +			   unsigned short dst_pitch, +			   struct pipe_buffer *dst_buffer, +			   unsigned dst_offset, +			   short srcx, short srcy, +			   short dstx, short dsty, +			   short w, short h ); + +extern void i915_fill_blit(struct i915_context *i915, +			   unsigned cpp, +			   unsigned short dst_pitch, +			   struct pipe_buffer *dst_buffer, +			   unsigned dst_offset, +			   short x, short y, +			   short w, short h, unsigned color); + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_clear.c b/src/gallium/drivers/i915simple/i915_clear.c new file mode 100644 index 0000000000..8a2d3ca43f --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_clear.c @@ -0,0 +1,48 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Author: + *    Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "i915_context.h" +#include "i915_state.h" + + +/** + * Clear the given surface to the specified value. + * No masking, no scissor (clear entire buffer). + */ +void +i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, +	   unsigned clearValue) +{ +   pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +   ps->status = PIPE_SURFACE_STATUS_DEFINED; +} diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c new file mode 100644 index 0000000000..3e3a596884 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -0,0 +1,193 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_state.h" +#include "i915_batch.h" +#include "i915_texture.h" +#include "i915_reg.h" + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "pipe/p_screen.h" + + +static void i915_destroy( struct pipe_context *pipe ) +{ +   struct i915_context *i915 = i915_context( pipe ); + +   draw_destroy( i915->draw ); +    +   if(i915->winsys->destroy) +      i915->winsys->destroy(i915->winsys); + +   FREE( i915 ); +} + + +static boolean +i915_draw_range_elements(struct pipe_context *pipe, +			     struct pipe_buffer *indexBuffer, +			     unsigned indexSize, +			     unsigned min_index, +			     unsigned max_index, +			     unsigned prim, unsigned start, unsigned count) +{ +   struct i915_context *i915 = i915_context( pipe ); +   struct draw_context *draw = i915->draw; +   unsigned i; + +   if (i915->dirty) +      i915_update_derived( i915 ); + +   /* +    * Map vertex buffers +    */ +   for (i = 0; i < i915->num_vertex_buffers; i++) { +      void *buf +         = pipe_buffer_map(pipe->screen, +                                    i915->vertex_buffer[i].buffer, +                                    PIPE_BUFFER_USAGE_CPU_READ); +      draw_set_mapped_vertex_buffer(draw, i, buf); +   } +   /* Map index buffer, if present */ +   if (indexBuffer) { +      void *mapped_indexes +         = pipe_buffer_map(pipe->screen, indexBuffer, +                                    PIPE_BUFFER_USAGE_CPU_READ); +      draw_set_mapped_element_buffer_range(draw, indexSize, +					   min_index, +					   max_index, +					   mapped_indexes); +   } +   else { +      /* no index/element buffer */ +      draw_set_mapped_element_buffer(draw, 0, NULL); +   } + + +   draw_set_mapped_constant_buffer(draw, +                                   i915->current.constants[PIPE_SHADER_VERTEX], +                                   ( i915->current.num_user_constants[PIPE_SHADER_VERTEX] *  +                                     4 * sizeof(float) )); + +   /* draw! */ +   draw_arrays(i915->draw, prim, start, count); + +   /* +    * unmap vertex/index buffers +    */ +   for (i = 0; i < i915->num_vertex_buffers; i++) { +      pipe_buffer_unmap(pipe->screen, i915->vertex_buffer[i].buffer); +      draw_set_mapped_vertex_buffer(draw, i, NULL); +   } +   if (indexBuffer) { +      pipe_buffer_unmap(pipe->screen, indexBuffer); +      draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); +   } + +   return TRUE; +} + +static boolean +i915_draw_elements( struct pipe_context *pipe, +                    struct pipe_buffer *indexBuffer, +                    unsigned indexSize, +                    unsigned prim, unsigned start, unsigned count) +{ +   return i915_draw_range_elements( pipe, indexBuffer, +					indexSize, +					0, 0xffffffff, +					prim, start, count ); +} + +static boolean i915_draw_arrays( struct pipe_context *pipe, +				 unsigned prim, unsigned start, unsigned count) +{ +   return i915_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + +struct pipe_context *i915_create_context( struct pipe_screen *screen, +                                          struct pipe_winsys *pipe_winsys, +                                          struct i915_winsys *i915_winsys ) +{ +   struct i915_context *i915; + +   i915 = CALLOC_STRUCT(i915_context); +   if (i915 == NULL) +      return NULL; + +   i915->winsys = i915_winsys; +   i915->pipe.winsys = pipe_winsys; +   i915->pipe.screen = screen; + +   i915->pipe.destroy = i915_destroy; + +   i915->pipe.clear = i915_clear; + + +   i915->pipe.draw_arrays = i915_draw_arrays; +   i915->pipe.draw_elements = i915_draw_elements; +   i915->pipe.draw_range_elements = i915_draw_range_elements; + +   /* +    * Create drawing context and plug our rendering stage into it. +    */ +   i915->draw = draw_create(); +   assert(i915->draw); +   if (!debug_get_bool_option("I915_NO_VBUF", FALSE)) { +      draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915)); +   } +   else { +      draw_set_rasterize_stage(i915->draw, i915_draw_render_stage(i915)); +   } + +   i915_init_surface_functions(i915); +   i915_init_state_functions(i915); +   i915_init_flush_functions(i915); +   i915_init_texture_functions(i915); + +   draw_install_aaline_stage(i915->draw, &i915->pipe); +   draw_install_aapoint_stage(i915->draw, &i915->pipe); + +   i915->dirty = ~0; +   i915->hardware_dirty = ~0; + +   /* Batch stream debugging is a bit hacked up at the moment: +    */ +   i915->batch = i915_winsys->batch_get(i915_winsys); +   i915->batch->winsys = i915_winsys; + +   return &i915->pipe; +} + diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h new file mode 100644 index 0000000000..3cdabe45f9 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -0,0 +1,345 @@ + /************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef I915_CONTEXT_H +#define I915_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "draw/draw_vertex.h" + +#include "tgsi/tgsi_scan.h" + + +#define I915_TEX_UNITS 8 + +#define I915_DYNAMIC_MODES4       0 +#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */ +#define I915_DYNAMIC_DEPTHSCALE_1 2  +#define I915_DYNAMIC_IAB          3 +#define I915_DYNAMIC_BC_0         4 /* just the header */ +#define I915_DYNAMIC_BC_1         5 +#define I915_DYNAMIC_BFO_0        6  +#define I915_DYNAMIC_BFO_1        7 +#define I915_DYNAMIC_STP_0        8  +#define I915_DYNAMIC_STP_1        9  +#define I915_DYNAMIC_SC_ENA_0     10  +#define I915_DYNAMIC_SC_RECT_0    11  +#define I915_DYNAMIC_SC_RECT_1    12  +#define I915_DYNAMIC_SC_RECT_2    13  +#define I915_MAX_DYNAMIC          14 + + +#define I915_IMMEDIATE_S0         0 +#define I915_IMMEDIATE_S1         1 +#define I915_IMMEDIATE_S2         2 +#define I915_IMMEDIATE_S3         3 +#define I915_IMMEDIATE_S4         4 +#define I915_IMMEDIATE_S5         5 +#define I915_IMMEDIATE_S6         6 +#define I915_IMMEDIATE_S7         7 +#define I915_MAX_IMMEDIATE        8 + +/* These must mach the order of LI0_STATE_* bits, as they will be used + * to generate hardware packets: + */ +#define I915_CACHE_STATIC         0  +#define I915_CACHE_DYNAMIC        1 /* handled specially */ +#define I915_CACHE_SAMPLER        2 +#define I915_CACHE_MAP            3 +#define I915_CACHE_PROGRAM        4 +#define I915_CACHE_CONSTANTS      5 +#define I915_MAX_CACHE            6 + +#define I915_MAX_CONSTANT  32 + + +/** See constant_flags[] below */ +#define I915_CONSTFLAG_USER 0x1f + + +/** + * Subclass of pipe_shader_state + */ +struct i915_fragment_shader +{ +   struct pipe_shader_state state; + +   struct tgsi_shader_info info; + +   uint *program; +   uint program_len; + +   /** +    * constants introduced during translation. +    * These are placed at the end of the constant buffer and grow toward +    * the beginning (eg: slot 31, 30 29, ...) +    * User-provided constants start at 0. +    * This allows both types of constants to co-exist (until there's too many) +    * and doesn't require regenerating/changing the fragment program to +    * shuffle constants around. +    */ +   uint num_constants; +   float constants[I915_MAX_CONSTANT][4]; + +   /** +    * Status of each constant +    * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding +    * slot of the user's constant buffer. (set by pipe->set_constant_buffer()) +    * Else, the bitmask indicates which components are occupied by immediates. +    */ +   ubyte constant_flags[I915_MAX_CONSTANT]; +}; + + +struct i915_cache_context; + +/* Use to calculate differences between state emitted to hardware and + * current driver-calculated state.   + */ +struct i915_state  +{ +   unsigned immediate[I915_MAX_IMMEDIATE]; +   unsigned dynamic[I915_MAX_DYNAMIC]; + +   float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; +   /** number of constants passed in through a constant buffer */ +   uint num_user_constants[PIPE_SHADER_TYPES]; + +   /* texture sampler state */ +   unsigned sampler[I915_TEX_UNITS][3]; +   unsigned sampler_enable_flags; +   unsigned sampler_enable_nr; + +   /* texture image buffers */ +   unsigned texbuffer[I915_TEX_UNITS][2]; + +   /** Describes the current hardware vertex layout */ +   struct vertex_info vertex_info; +    +   unsigned id;			/* track lost context events */ +}; + +struct i915_blend_state { +   unsigned iab; +   unsigned modes4; +   unsigned LIS5; +   unsigned LIS6; +}; + +struct i915_depth_stencil_state { +   unsigned stencil_modes4; +   unsigned bfo[2]; +   unsigned stencil_LIS5; +   unsigned depth_LIS6; +}; + +struct i915_rasterizer_state { +   int light_twoside : 1; +   unsigned st; +   enum interp_mode color_interp; + +   unsigned LIS4; +   unsigned LIS7; +   unsigned sc[1]; + +   const struct pipe_rasterizer_state *templ; + +   union { float f; unsigned u; } ds[2]; +}; + +struct i915_sampler_state { +   unsigned state[3]; +   const struct pipe_sampler_state *templ; +   unsigned minlod; +   unsigned maxlod; +}; + + +struct i915_texture { +   struct pipe_texture base; + +   /* Derived from the above: +    */ +   unsigned stride; +   unsigned depth_stride;          /* per-image on i945? */ +   unsigned total_nblocksy; + +   unsigned tiled; + +   unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + +   /* Explicitly store the offset of each image for each cube face or +    * depth value.  Pretty much have to accept that hardware formats +    * are going to be so diverse that there is no unified way to +    * compute the offsets of depth/cube images within a mipmap level, +    * so have to store them as a lookup table: +    */ +   unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS];   /**< array [depth] of offsets */ + +   /* The data is held here: +    */ +   struct pipe_buffer *buffer; +}; + +struct i915_batchbuffer; + +struct i915_context +{ +   struct pipe_context pipe; +   struct i915_winsys *winsys; +   struct draw_context *draw; + +   /* The most recent drawing state as set by the driver: +    */ +   const struct i915_blend_state           *blend; +   const struct i915_sampler_state         *sampler[PIPE_MAX_SAMPLERS]; +   const struct i915_depth_stencil_state   *depth_stencil; +   const struct i915_rasterizer_state      *rasterizer; + +   struct i915_fragment_shader *fs; + +   struct pipe_blend_color blend_color; +   struct pipe_clip_state clip; +   struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; +   struct pipe_framebuffer_state framebuffer; +   struct pipe_poly_stipple poly_stipple; +   struct pipe_scissor_state scissor; +   struct i915_texture *texture[PIPE_MAX_SAMPLERS]; +   struct pipe_viewport_state viewport; +   struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + +   unsigned dirty; + +   unsigned num_samplers; +   unsigned num_textures; +   unsigned num_vertex_elements; +   unsigned num_vertex_buffers; + +   struct i915_batchbuffer *batch; + +   /** Vertex buffer */ +   struct pipe_buffer *vbo; +   size_t vbo_offset; +   unsigned vbo_flushed; + +   struct i915_state current; +   unsigned hardware_dirty; +    +   unsigned debug; +}; + +/* A flag for each state_tracker state object: + */ +#define I915_NEW_VIEWPORT      0x1 +#define I915_NEW_RASTERIZER    0x2 +#define I915_NEW_FS            0x4 +#define I915_NEW_BLEND         0x8 +#define I915_NEW_CLIP          0x10 +#define I915_NEW_SCISSOR       0x20 +#define I915_NEW_STIPPLE       0x40 +#define I915_NEW_FRAMEBUFFER   0x80 +#define I915_NEW_ALPHA_TEST    0x100 +#define I915_NEW_DEPTH_STENCIL 0x200 +#define I915_NEW_SAMPLER       0x400 +#define I915_NEW_TEXTURE       0x800 +#define I915_NEW_CONSTANTS     0x1000 +#define I915_NEW_VBO           0x2000 +#define I915_NEW_VS            0x4000 + + +/* Driver's internally generated state flags: + */ +#define I915_NEW_VERTEX_FORMAT    0x10000 + + +/* Dirty flags for hardware emit + */ +#define I915_HW_STATIC            (1<<I915_CACHE_STATIC) +#define I915_HW_DYNAMIC           (1<<I915_CACHE_DYNAMIC) +#define I915_HW_SAMPLER           (1<<I915_CACHE_SAMPLER) +#define I915_HW_MAP               (1<<I915_CACHE_MAP) +#define I915_HW_PROGRAM           (1<<I915_CACHE_PROGRAM) +#define I915_HW_CONSTANTS         (1<<I915_CACHE_CONSTANTS) +#define I915_HW_IMMEDIATE         (1<<(I915_MAX_CACHE+0)) +#define I915_HW_INVARIENT         (1<<(I915_MAX_CACHE+1)) + + +/*********************************************************************** + * i915_prim_emit.c:  + */ +struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ); + + +/*********************************************************************** + * i915_prim_vbuf.c:  + */ +struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ); + + +/*********************************************************************** + * i915_state_emit.c:  + */ +void i915_emit_hardware_state(struct i915_context *i915 ); + + + +/*********************************************************************** + * i915_clear.c:  + */ +void i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, +		unsigned clearValue); + + +/*********************************************************************** + * i915_surface.c:  + */ +void i915_init_surface_functions( struct i915_context *i915 ); + +void i915_init_state_functions( struct i915_context *i915 ); +void i915_init_flush_functions( struct i915_context *i915 ); +void i915_init_string_functions( struct i915_context *i915 ); + + + + +/*********************************************************************** + * Inline conversion functions.  These are better-typed than the + * macros used previously: + */ +static INLINE struct i915_context * +i915_context( struct pipe_context *pipe ) +{ +   return (struct i915_context *)pipe; +} + + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c new file mode 100644 index 0000000000..a300b61c3b --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_debug.c @@ -0,0 +1,899 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_debug.h" +#include "i915_batch.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_debug.h" + + +static void +PRINTF( +   struct debug_stream  *stream, +   const char           *fmt, +                        ... ) +{ +   va_list  args; + +   va_start( args, fmt ); +   debug_vprintf( fmt, args ); +   va_end( args ); +} + + +static boolean debug( struct debug_stream *stream, const char *name, unsigned len ) +{ +   unsigned i; +   unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); +    +   if (len == 0) { +      PRINTF(stream, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); +      assert(0); +      return FALSE; +   } + +   if (stream->print_addresses) +      PRINTF(stream, "%08x:  ", stream->offset); + + +   PRINTF(stream, "%s (%d dwords):\n", name, len); +   for (i = 0; i < len; i++) +      PRINTF(stream, "\t0x%08x\n",  ptr[i]);    +   PRINTF(stream, "\n"); + +   stream->offset += len * sizeof(unsigned); +    +   return TRUE; +} + + +static const char *get_prim_name( unsigned val ) +{ +   switch (val & PRIM3D_MASK) { +   case PRIM3D_TRILIST: return "TRILIST"; break; +   case PRIM3D_TRISTRIP: return "TRISTRIP"; break; +   case PRIM3D_TRISTRIP_RVRSE: return "TRISTRIP_RVRSE"; break; +   case PRIM3D_TRIFAN: return "TRIFAN"; break; +   case PRIM3D_POLY: return "POLY"; break; +   case PRIM3D_LINELIST: return "LINELIST"; break; +   case PRIM3D_LINESTRIP: return "LINESTRIP"; break; +   case PRIM3D_RECTLIST: return "RECTLIST"; break; +   case PRIM3D_POINTLIST: return "POINTLIST"; break; +   case PRIM3D_DIB: return "DIB"; break; +   case PRIM3D_CLEAR_RECT: return "CLEAR_RECT"; break; +   case PRIM3D_ZONE_INIT: return "ZONE_INIT"; break; +   default: return "????"; break; +   } +} + +static boolean debug_prim( struct debug_stream *stream, const char *name,  +			     boolean dump_floats, +			     unsigned len ) +{ +   unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); +   const char *prim = get_prim_name( ptr[0] ); +   unsigned i; +    + + +   PRINTF(stream, "%s %s (%d dwords):\n", name, prim, len); +   PRINTF(stream, "\t0x%08x\n",  ptr[0]);    +   for (i = 1; i < len; i++) { +      if (dump_floats) +	 PRINTF(stream, "\t0x%08x // %f\n",  ptr[i], *(float *)&ptr[i]);    +      else +	 PRINTF(stream, "\t0x%08x\n",  ptr[i]);    +   } + +       +   PRINTF(stream, "\n"); + +   stream->offset += len * sizeof(unsigned); +    +   return TRUE; +} +    + + + +static boolean debug_program( struct debug_stream *stream, const char *name, unsigned len ) +{ +   unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + +   if (len == 0) { +      PRINTF(stream, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); +      assert(0); +      return FALSE; +   } + +   if (stream->print_addresses) +      PRINTF(stream, "%08x:  ", stream->offset); + +   PRINTF(stream, "%s (%d dwords):\n", name, len); +   i915_disassemble_program( stream, ptr, len ); + +   stream->offset += len * sizeof(unsigned); +   return TRUE; +} + + +static boolean debug_chain( struct debug_stream *stream, const char *name, unsigned len ) +{ +   unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); +   unsigned old_offset = stream->offset + len * sizeof(unsigned); +   unsigned i; + +   PRINTF(stream, "%s (%d dwords):\n", name, len); +   for (i = 0; i < len; i++) +      PRINTF(stream, "\t0x%08x\n",  ptr[i]); + +   stream->offset = ptr[1] & ~0x3; +    +   if (stream->offset < old_offset) +      PRINTF(stream, "\n... skipping backwards from 0x%x --> 0x%x ...\n\n",  +		   old_offset, stream->offset ); +   else +      PRINTF(stream, "\n... skipping from 0x%x --> 0x%x ...\n\n",  +		   old_offset, stream->offset ); + + +   return TRUE; +} + + +static boolean debug_variable_length_prim( struct debug_stream *stream ) +{ +   unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); +   const char *prim = get_prim_name( ptr[0] ); +   unsigned i, len; + +   ushort *idx = (ushort *)(ptr+1); +   for (i = 0; idx[i] != 0xffff; i++) +      ; + +   len = 1+(i+2)/2; + +   PRINTF(stream, "3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len); +   for (i = 0; i < len; i++) +      PRINTF(stream, "\t0x%08x\n",  ptr[i]); +   PRINTF(stream, "\n"); + +   stream->offset += len * sizeof(unsigned); +   return TRUE; +} + + +static void +BITS( +   struct debug_stream  *stream, +   unsigned             dw, +   unsigned             hi, +   unsigned             lo, +   const char           *fmt, +                        ... ) +{ +   va_list  args; +   unsigned himask = ~0UL >> (31 - (hi)); + +   PRINTF(stream, "\t\t "); + +   va_start( args, fmt ); +   debug_vprintf( fmt, args ); +   va_end( args ); + +   PRINTF(stream, ": 0x%x\n", ((dw) & himask) >> (lo)); +} + +#ifdef DEBUG +#define MBZ( dw, hi, lo) do {							\ +   unsigned x = (dw) >> (lo);				\ +   unsigned lomask = (1 << (lo)) - 1;			\ +   unsigned himask;					\ +   himask = (1UL << (hi)) - 1;				\ +   assert ((x & himask & ~lomask) == 0);	\ +} while (0) +#else +#define MBZ( dw, hi, lo) do {							\ +} while (0) +#endif + +static void +FLAG( +   struct debug_stream  *stream, +   unsigned             dw, +   unsigned             bit, +   const char           *fmt, +                        ... ) +{ +   if (((dw) >> (bit)) & 1) { +      va_list  args; + +      PRINTF(stream, "\t\t "); + +      va_start( args, fmt ); +      debug_vprintf( fmt, args ); +      va_end( args ); + +      PRINTF(stream, "\n"); +   } +} + +static boolean debug_load_immediate( struct debug_stream *stream, +				       const char *name, +				       unsigned len ) +{ +   unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); +   unsigned bits = (ptr[0] >> 4) & 0xff; +   unsigned j = 0; +    +   PRINTF(stream, "%s (%d dwords, flags: %x):\n", name, len, bits); +   PRINTF(stream, "\t0x%08x\n",  ptr[j++]); + +   if (bits & (1<<0)) { +      PRINTF(stream, "\t  LIS0: 0x%08x\n", ptr[j]); +      PRINTF(stream, "\t vb address: 0x%08x\n", (ptr[j] & ~0x3)); +      BITS(stream, ptr[j], 0, 0, "vb invalidate disable"); +      j++; +   } +   if (bits & (1<<1)) { +      PRINTF(stream, "\t  LIS1: 0x%08x\n", ptr[j]); +      BITS(stream, ptr[j], 29, 24, "vb dword width"); +      BITS(stream, ptr[j], 21, 16, "vb dword pitch"); +      BITS(stream, ptr[j], 15, 0, "vb max index"); +      j++; +   } +   if (bits & (1<<2)) { +      int i; +      PRINTF(stream, "\t  LIS2: 0x%08x\n", ptr[j]); +      for (i = 0; i < 8; i++) { +	 unsigned tc = (ptr[j] >> (i * 4)) & 0xf; +	 if (tc != 0xf) +	    BITS(stream, tc, 3, 0, "tex coord %d", i); +      } +      j++; +   } +   if (bits & (1<<3)) { +      PRINTF(stream, "\t  LIS3: 0x%08x\n", ptr[j]); +      j++; +   } +   if (bits & (1<<4)) { +      PRINTF(stream, "\t  LIS4: 0x%08x\n", ptr[j]); +      BITS(stream, ptr[j], 31, 23, "point width"); +      BITS(stream, ptr[j], 22, 19, "line width"); +      FLAG(stream, ptr[j], 18, "alpha flatshade"); +      FLAG(stream, ptr[j], 17, "fog flatshade"); +      FLAG(stream, ptr[j], 16, "spec flatshade"); +      FLAG(stream, ptr[j], 15, "rgb flatshade"); +      BITS(stream, ptr[j], 14, 13, "cull mode"); +      FLAG(stream, ptr[j], 12, "vfmt: point width"); +      FLAG(stream, ptr[j], 11, "vfmt: specular/fog"); +      FLAG(stream, ptr[j], 10, "vfmt: rgba"); +      FLAG(stream, ptr[j], 9, "vfmt: depth offset"); +      BITS(stream, ptr[j], 8, 6, "vfmt: position (2==xyzw)"); +      FLAG(stream, ptr[j], 5, "force dflt diffuse"); +      FLAG(stream, ptr[j], 4, "force dflt specular"); +      FLAG(stream, ptr[j], 3, "local depth offset enable"); +      FLAG(stream, ptr[j], 2, "vfmt: fp32 fog coord"); +      FLAG(stream, ptr[j], 1, "sprite point"); +      FLAG(stream, ptr[j], 0, "antialiasing"); +      j++; +   } +   if (bits & (1<<5)) { +      PRINTF(stream, "\t  LIS5: 0x%08x\n", ptr[j]); +      BITS(stream, ptr[j], 31, 28, "rgba write disables"); +      FLAG(stream, ptr[j], 27,     "force dflt point width"); +      FLAG(stream, ptr[j], 26,     "last pixel enable"); +      FLAG(stream, ptr[j], 25,     "global z offset enable"); +      FLAG(stream, ptr[j], 24,     "fog enable"); +      BITS(stream, ptr[j], 23, 16, "stencil ref"); +      BITS(stream, ptr[j], 15, 13, "stencil test"); +      BITS(stream, ptr[j], 12, 10, "stencil fail op"); +      BITS(stream, ptr[j], 9, 7,   "stencil pass z fail op"); +      BITS(stream, ptr[j], 6, 4,   "stencil pass z pass op"); +      FLAG(stream, ptr[j], 3,      "stencil write enable"); +      FLAG(stream, ptr[j], 2,      "stencil test enable"); +      FLAG(stream, ptr[j], 1,      "color dither enable"); +      FLAG(stream, ptr[j], 0,      "logiop enable"); +      j++; +   } +   if (bits & (1<<6)) { +      PRINTF(stream, "\t  LIS6: 0x%08x\n", ptr[j]); +      FLAG(stream, ptr[j], 31,      "alpha test enable"); +      BITS(stream, ptr[j], 30, 28,  "alpha func"); +      BITS(stream, ptr[j], 27, 20,  "alpha ref"); +      FLAG(stream, ptr[j], 19,      "depth test enable"); +      BITS(stream, ptr[j], 18, 16,  "depth func"); +      FLAG(stream, ptr[j], 15,      "blend enable"); +      BITS(stream, ptr[j], 14, 12,  "blend func"); +      BITS(stream, ptr[j], 11, 8,   "blend src factor"); +      BITS(stream, ptr[j], 7,  4,   "blend dst factor"); +      FLAG(stream, ptr[j], 3,       "depth write enable"); +      FLAG(stream, ptr[j], 2,       "color write enable"); +      BITS(stream, ptr[j], 1,  0,   "provoking vertex");  +      j++; +   } + + +   PRINTF(stream, "\n"); + +   assert(j == len); + +   stream->offset += len * sizeof(unsigned); +    +   return TRUE; +} +  + + +static boolean debug_load_indirect( struct debug_stream *stream, +				      const char *name, +				      unsigned len ) +{ +   unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); +   unsigned bits = (ptr[0] >> 8) & 0x3f; +   unsigned i, j = 0; +    +   PRINTF(stream, "%s (%d dwords):\n", name, len); +   PRINTF(stream, "\t0x%08x\n",  ptr[j++]); + +   for (i = 0; i < 6; i++) { +      if (bits & (1<<i)) { +	 switch (1<<(8+i)) { +	 case LI0_STATE_STATIC_INDIRECT: +	    PRINTF(stream, "        STATIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; +	    PRINTF(stream, "                0x%08x\n", ptr[j++]); +	    break; +	 case LI0_STATE_DYNAMIC_INDIRECT: +	    PRINTF(stream, "       DYNAMIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; +	    break; +	 case LI0_STATE_SAMPLER: +	    PRINTF(stream, "       SAMPLER: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; +	    PRINTF(stream, "                0x%08x\n", ptr[j++]); +	    break; +	 case LI0_STATE_MAP: +	    PRINTF(stream, "           MAP: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; +	    PRINTF(stream, "                0x%08x\n", ptr[j++]); +	    break; +	 case LI0_STATE_PROGRAM: +	    PRINTF(stream, "       PROGRAM: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; +	    PRINTF(stream, "                0x%08x\n", ptr[j++]); +	    break; +	 case LI0_STATE_CONSTANTS: +	    PRINTF(stream, "     CONSTANTS: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; +	    PRINTF(stream, "                0x%08x\n", ptr[j++]); +	    break; +	 default: +	    assert(0); +	    break; +	 } +      } +   } + +   if (bits == 0) { +      PRINTF(stream, "\t  DUMMY: 0x%08x\n", ptr[j++]); +   } + +   PRINTF(stream, "\n"); + + +   assert(j == len); + +   stream->offset += len * sizeof(unsigned); +    +   return TRUE; +} + 	 +static void BR13( struct debug_stream *stream, +		  unsigned val ) +{ +   PRINTF(stream, "\t0x%08x\n",  val); +   FLAG(stream, val, 30, "clipping enable"); +   BITS(stream, val, 25, 24, "color depth (3==32bpp)"); +   BITS(stream, val, 23, 16, "raster op"); +   BITS(stream, val, 15, 0,  "dest pitch"); +} + + +static void BR22( struct debug_stream *stream, +		  unsigned val ) +{ +   PRINTF(stream, "\t0x%08x\n",  val); +   BITS(stream, val, 31, 16, "dest y1"); +   BITS(stream, val, 15, 0,  "dest x1"); +} + +static void BR23( struct debug_stream *stream, +		  unsigned val ) +{ +   PRINTF(stream, "\t0x%08x\n",  val); +   BITS(stream, val, 31, 16, "dest y2"); +   BITS(stream, val, 15, 0,  "dest x2"); +} + +static void BR09( struct debug_stream *stream, +		  unsigned val ) +{ +   PRINTF(stream, "\t0x%08x -- dest address\n",  val); +} + +static void BR26( struct debug_stream *stream, +		  unsigned val ) +{ +   PRINTF(stream, "\t0x%08x\n",  val); +   BITS(stream, val, 31, 16, "src y1"); +   BITS(stream, val, 15, 0,  "src x1"); +} + +static void BR11( struct debug_stream *stream, +		  unsigned val ) +{ +   PRINTF(stream, "\t0x%08x\n",  val); +   BITS(stream, val, 15, 0,  "src pitch"); +} + +static void BR12( struct debug_stream *stream, +		  unsigned val ) +{ +   PRINTF(stream, "\t0x%08x -- src address\n",  val); +} + +static void BR16( struct debug_stream *stream, +		  unsigned val ) +{ +   PRINTF(stream, "\t0x%08x -- color\n",  val); +} +    +static boolean debug_copy_blit( struct debug_stream *stream, +				  const char *name, +				  unsigned len ) +{ +   unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); +   int j = 0; + +   PRINTF(stream, "%s (%d dwords):\n", name, len); +   PRINTF(stream, "\t0x%08x\n",  ptr[j++]); +    +   BR13(stream, ptr[j++]); +   BR22(stream, ptr[j++]); +   BR23(stream, ptr[j++]); +   BR09(stream, ptr[j++]); +   BR26(stream, ptr[j++]); +   BR11(stream, ptr[j++]); +   BR12(stream, ptr[j++]); + +   stream->offset += len * sizeof(unsigned); +   assert(j == len); +   return TRUE; +} + +static boolean debug_color_blit( struct debug_stream *stream, +				  const char *name, +				  unsigned len ) +{ +   unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); +   int j = 0; + +   PRINTF(stream, "%s (%d dwords):\n", name, len); +   PRINTF(stream, "\t0x%08x\n",  ptr[j++]); + +   BR13(stream, ptr[j++]); +   BR22(stream, ptr[j++]); +   BR23(stream, ptr[j++]); +   BR09(stream, ptr[j++]); +   BR16(stream, ptr[j++]); + +   stream->offset += len * sizeof(unsigned); +   assert(j == len); +   return TRUE; +} + +static boolean debug_modes4( struct debug_stream *stream, +				  const char *name, +				  unsigned len ) +{ +   unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); +   int j = 0; + +   PRINTF(stream, "%s (%d dwords):\n", name, len); +   PRINTF(stream, "\t0x%08x\n",  ptr[j]); +   BITS(stream, ptr[j], 21, 18, "logicop func"); +   FLAG(stream, ptr[j], 17, "stencil test mask modify-enable"); +   FLAG(stream, ptr[j], 16, "stencil write mask modify-enable"); +   BITS(stream, ptr[j], 15, 8, "stencil test mask"); +   BITS(stream, ptr[j], 7, 0,  "stencil write mask"); +   j++; + +   stream->offset += len * sizeof(unsigned); +   assert(j == len); +   return TRUE; +} + +static boolean debug_map_state( struct debug_stream *stream, +				  const char *name, +				  unsigned len ) +{ +   unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); +   unsigned j = 0; + +   PRINTF(stream, "%s (%d dwords):\n", name, len); +   PRINTF(stream, "\t0x%08x\n",  ptr[j++]); +    +   { +      PRINTF(stream, "\t0x%08x\n",  ptr[j]); +      BITS(stream, ptr[j], 15, 0,   "map mask"); +      j++; +   } + +   while (j < len) { +      { +	 PRINTF(stream, "\t  TMn.0: 0x%08x\n", ptr[j]); +	 PRINTF(stream, "\t map address: 0x%08x\n", (ptr[j] & ~0x3)); +	 FLAG(stream, ptr[j], 1, "vertical line stride"); +	 FLAG(stream, ptr[j], 0, "vertical line stride offset"); +	 j++; +      } + +      { +	 PRINTF(stream, "\t  TMn.1: 0x%08x\n", ptr[j]); +	 BITS(stream, ptr[j], 31, 21, "height"); +	 BITS(stream, ptr[j], 20, 10, "width"); +	 BITS(stream, ptr[j], 9, 7, "surface format"); +	 BITS(stream, ptr[j], 6, 3, "texel format"); +	 FLAG(stream, ptr[j], 2, "use fence regs"); +	 FLAG(stream, ptr[j], 1, "tiled surface"); +	 FLAG(stream, ptr[j], 0, "tile walk ymajor"); +	 j++; +      } +      { +	 PRINTF(stream, "\t  TMn.2: 0x%08x\n", ptr[j]); +	 BITS(stream, ptr[j], 31, 21, "dword pitch"); +	 BITS(stream, ptr[j], 20, 15, "cube face enables"); +	 BITS(stream, ptr[j], 14, 9, "max lod"); +	 FLAG(stream, ptr[j], 8,     "mip layout right"); +	 BITS(stream, ptr[j], 7, 0, "depth"); +	 j++; +      } +   } + +   stream->offset += len * sizeof(unsigned); +   assert(j == len); +   return TRUE; +} + +static boolean debug_sampler_state( struct debug_stream *stream, +				  const char *name, +				  unsigned len ) +{ +   unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); +   unsigned j = 0; + +   PRINTF(stream, "%s (%d dwords):\n", name, len); +   PRINTF(stream, "\t0x%08x\n",  ptr[j++]); +    +   { +      PRINTF(stream, "\t0x%08x\n",  ptr[j]); +      BITS(stream, ptr[j], 15, 0,   "sampler mask"); +      j++; +   } + +   while (j < len) { +      { +	 PRINTF(stream, "\t  TSn.0: 0x%08x\n", ptr[j]); +	 FLAG(stream, ptr[j], 31, "reverse gamma"); +	 FLAG(stream, ptr[j], 30, "planar to packed"); +	 FLAG(stream, ptr[j], 29, "yuv->rgb"); +	 BITS(stream, ptr[j], 28, 27, "chromakey index"); +	 BITS(stream, ptr[j], 26, 22, "base mip level"); +	 BITS(stream, ptr[j], 21, 20, "mip mode filter"); +	 BITS(stream, ptr[j], 19, 17, "mag mode filter"); +	 BITS(stream, ptr[j], 16, 14, "min mode filter"); +	 BITS(stream, ptr[j], 13, 5,  "lod bias (s4.4)"); +	 FLAG(stream, ptr[j], 4,      "shadow enable"); +	 FLAG(stream, ptr[j], 3,      "max-aniso-4"); +	 BITS(stream, ptr[j], 2, 0,   "shadow func"); +	 j++; +      } + +      { +	 PRINTF(stream, "\t  TSn.1: 0x%08x\n", ptr[j]); +	 BITS(stream, ptr[j], 31, 24, "min lod"); +	 MBZ( ptr[j], 23, 18 ); +	 FLAG(stream, ptr[j], 17,     "kill pixel enable"); +	 FLAG(stream, ptr[j], 16,     "keyed tex filter mode"); +	 FLAG(stream, ptr[j], 15,     "chromakey enable"); +	 BITS(stream, ptr[j], 14, 12, "tcx wrap mode"); +	 BITS(stream, ptr[j], 11, 9,  "tcy wrap mode"); +	 BITS(stream, ptr[j], 8,  6,  "tcz wrap mode"); +	 FLAG(stream, ptr[j], 5,      "normalized coords"); +	 BITS(stream, ptr[j], 4,  1,  "map (surface) index"); +	 FLAG(stream, ptr[j], 0,      "EAST deinterlacer enable"); +	 j++; +      } +      { +	 PRINTF(stream, "\t  TSn.2: 0x%08x  (default color)\n", ptr[j]); +	 j++; +      } +   } + +   stream->offset += len * sizeof(unsigned); +   assert(j == len); +   return TRUE; +} + +static boolean debug_dest_vars( struct debug_stream *stream, +				  const char *name, +				  unsigned len ) +{ +   unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); +   int j = 0; + +   PRINTF(stream, "%s (%d dwords):\n", name, len); +   PRINTF(stream, "\t0x%08x\n",  ptr[j++]); + +   { +      PRINTF(stream, "\t0x%08x\n",  ptr[j]); +      FLAG(stream, ptr[j], 31,     "early classic ztest"); +      FLAG(stream, ptr[j], 30,     "opengl tex default color"); +      FLAG(stream, ptr[j], 29,     "bypass iz"); +      FLAG(stream, ptr[j], 28,     "lod preclamp"); +      BITS(stream, ptr[j], 27, 26, "dither pattern"); +      FLAG(stream, ptr[j], 25,     "linear gamma blend"); +      FLAG(stream, ptr[j], 24,     "debug dither"); +      BITS(stream, ptr[j], 23, 20, "dstorg x"); +      BITS(stream, ptr[j], 19, 16, "dstorg y"); +      MBZ (ptr[j], 15, 15 ); +      BITS(stream, ptr[j], 14, 12, "422 write select"); +      BITS(stream, ptr[j], 11, 8,  "cbuf format"); +      BITS(stream, ptr[j], 3, 2,   "zbuf format"); +      FLAG(stream, ptr[j], 1,      "vert line stride"); +      FLAG(stream, ptr[j], 1,      "vert line stride offset"); +      j++; +   } +    +   stream->offset += len * sizeof(unsigned); +   assert(j == len); +   return TRUE; +} + +static boolean debug_buf_info( struct debug_stream *stream, +				  const char *name, +				  unsigned len ) +{ +   unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); +   int j = 0; + +   PRINTF(stream, "%s (%d dwords):\n", name, len); +   PRINTF(stream, "\t0x%08x\n",  ptr[j++]); + +   { +      PRINTF(stream, "\t0x%08x\n",  ptr[j]); +      BITS(stream, ptr[j], 28, 28, "aux buffer id"); +      BITS(stream, ptr[j], 27, 24, "buffer id (7=depth, 3=back)"); +      FLAG(stream, ptr[j], 23,     "use fence regs"); +      FLAG(stream, ptr[j], 22,     "tiled surface"); +      FLAG(stream, ptr[j], 21,     "tile walk ymajor"); +      MBZ (ptr[j], 20, 14); +      BITS(stream, ptr[j], 13, 2,  "dword pitch"); +      MBZ (ptr[j], 2,  0); +      j++; +   } +    +   PRINTF(stream, "\t0x%08x -- buffer base address\n",  ptr[j++]); + +   stream->offset += len * sizeof(unsigned); +   assert(j == len); +   return TRUE; +} + +static boolean i915_debug_packet( struct debug_stream *stream ) +{ +   unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); +   unsigned cmd = *ptr; +    +   switch (((cmd >> 29) & 0x7)) { +   case 0x0: +      switch ((cmd >> 23) & 0x3f) { +      case 0x0: +	 return debug(stream, "MI_NOOP", 1); +      case 0x3: +	 return debug(stream, "MI_WAIT_FOR_EVENT", 1); +      case 0x4: +	 return debug(stream, "MI_FLUSH", 1); +      case 0xA: +	 debug(stream, "MI_BATCH_BUFFER_END", 1); +	 return FALSE; +      case 0x22: +	 return debug(stream, "MI_LOAD_REGISTER_IMM", 3); +      case 0x31: +	 return debug_chain(stream, "MI_BATCH_BUFFER_START", 2); +      default: +         (void)debug(stream, "UNKNOWN 0x0 case!", 1); +         assert(0); +	 break; +      } +      break; +   case 0x1: +      (void) debug(stream, "UNKNOWN 0x1 case!", 1); +      assert(0); +      break; +   case 0x2: +      switch ((cmd >> 22) & 0xff) {	  +      case 0x50: +	 return debug_color_blit(stream, "XY_COLOR_BLT", (cmd & 0xff) + 2); +      case 0x53: +	 return debug_copy_blit(stream, "XY_SRC_COPY_BLT", (cmd & 0xff) + 2); +      default: +	 return debug(stream, "blit command", (cmd & 0xff) + 2); +      } +      break; +   case 0x3: +      switch ((cmd >> 24) & 0x1f) {	  +      case 0x6: +	 return debug(stream, "3DSTATE_ANTI_ALIASING", 1); +      case 0x7: +	 return debug(stream, "3DSTATE_RASTERIZATION_RULES", 1); +      case 0x8: +	 return debug(stream, "3DSTATE_BACKFACE_STENCIL_OPS", 2); +      case 0x9: +	 return debug(stream, "3DSTATE_BACKFACE_STENCIL_MASKS", 1); +      case 0xb: +	 return debug(stream, "3DSTATE_INDEPENDENT_ALPHA_BLEND", 1); +      case 0xc: +	 return debug(stream, "3DSTATE_MODES5", 1);	  +      case 0xd: +	 return debug_modes4(stream, "3DSTATE_MODES4", 1); +      case 0x15: +	 return debug(stream, "3DSTATE_FOG_COLOR", 1); +      case 0x16: +	 return debug(stream, "3DSTATE_COORD_SET_BINDINGS", 1); +      case 0x1c: +	 /* 3DState16NP */ +	 switch((cmd >> 19) & 0x1f) { +	 case 0x10: +	    return debug(stream, "3DSTATE_SCISSOR_ENABLE", 1); +	 case 0x11: +	    return debug(stream, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE", 1); +	 default: +            (void) debug(stream, "UNKNOWN 0x1c case!", 1); +            assert(0); +	    break; +	 } +	 break; +      case 0x1d: +	 /* 3DStateMW */ +	 switch ((cmd >> 16) & 0xff) { +	 case 0x0: +	    return debug_map_state(stream, "3DSTATE_MAP_STATE", (cmd & 0x1f) + 2); +	 case 0x1: +	    return debug_sampler_state(stream, "3DSTATE_SAMPLER_STATE", (cmd & 0x1f) + 2); +	 case 0x4: +	    return debug_load_immediate(stream, "3DSTATE_LOAD_STATE_IMMEDIATE", (cmd & 0xf) + 2); +	 case 0x5: +	    return debug_program(stream, "3DSTATE_PIXEL_SHADER_PROGRAM", (cmd & 0x1ff) + 2); +	 case 0x6: +	    return debug(stream, "3DSTATE_PIXEL_SHADER_CONSTANTS", (cmd & 0xff) + 2); +	 case 0x7: +	    return debug_load_indirect(stream, "3DSTATE_LOAD_INDIRECT", (cmd & 0xff) + 2); +	 case 0x80: +	    return debug(stream, "3DSTATE_DRAWING_RECTANGLE", (cmd & 0xffff) + 2); +	 case 0x81: +	    return debug(stream, "3DSTATE_SCISSOR_RECTANGLE", (cmd & 0xffff) + 2); +	 case 0x83: +	    return debug(stream, "3DSTATE_SPAN_STIPPLE", (cmd & 0xffff) + 2); +	 case 0x85: +	    return debug_dest_vars(stream, "3DSTATE_DEST_BUFFER_VARS", (cmd & 0xffff) + 2); +	 case 0x88: +	    return debug(stream, "3DSTATE_CONSTANT_BLEND_COLOR", (cmd & 0xffff) + 2); +	 case 0x89: +	    return debug(stream, "3DSTATE_FOG_MODE", (cmd & 0xffff) + 2); +	 case 0x8e: +	    return debug_buf_info(stream, "3DSTATE_BUFFER_INFO", (cmd & 0xffff) + 2); +	 case 0x97: +	    return debug(stream, "3DSTATE_DEPTH_OFFSET_SCALE", (cmd & 0xffff) + 2); +	 case 0x98: +	    return debug(stream, "3DSTATE_DEFAULT_Z", (cmd & 0xffff) + 2); +	 case 0x99: +	    return debug(stream, "3DSTATE_DEFAULT_DIFFUSE", (cmd & 0xffff) + 2); +	 case 0x9a: +	    return debug(stream, "3DSTATE_DEFAULT_SPECULAR", (cmd & 0xffff) + 2); +	 case 0x9c: +	    return debug(stream, "3DSTATE_CLEAR_PARAMETERS", (cmd & 0xffff) + 2); +	 default: +	    assert(0); +	    return 0; +	 } +	 break; +      case 0x1e: +	 if (cmd & (1 << 23)) +	    return debug(stream, "???", (cmd & 0xffff) + 1); +	 else +	    return debug(stream, "", 1); +	 break; +      case 0x1f: +	 if ((cmd & (1 << 23)) == 0)	 +	    return debug_prim(stream, "3DPRIM (inline)", 1, (cmd & 0x1ffff) + 2); +	 else if (cmd & (1 << 17))  +	 { +	    if ((cmd & 0xffff) == 0) +	       return debug_variable_length_prim(stream); +	    else +	       return debug_prim(stream, "3DPRIM (indexed)", 0, (((cmd & 0xffff) + 1) / 2) + 1); +	 } +	 else +	    return debug_prim(stream, "3DPRIM  (indirect sequential)", 0, 2);  +	 break; +      default: +	 return debug(stream, "", 0); +      } +   default: +      assert(0); +      return 0; +   } + +   assert(0); +   return 0; +} + + + +void +i915_dump_batchbuffer( struct i915_batchbuffer *batch ) +{ +   struct debug_stream stream; +   unsigned *start = (unsigned*)batch->map; +   unsigned *end = (unsigned*)batch->ptr; +   unsigned long bytes = (unsigned long) (end - start) * 4; +   boolean done = FALSE; + +   stream.offset = 0; +   stream.ptr = (char *)start; +   stream.print_addresses = 0; + +   if (!start || !end) { +      debug_printf( "\n\nBATCH: ???\n"); +      return; +   } +    +   debug_printf( "\n\nBATCH: (%d)\n", bytes / 4); + +   while (!done && +	  stream.offset < bytes) +   { +      if (!i915_debug_packet( &stream )) +	 break; + +      assert(stream.offset <= bytes && +	     stream.offset >= 0); +   } + +   debug_printf( "END-BATCH\n\n\n"); +} + + diff --git a/src/gallium/drivers/i915simple/i915_debug.h b/src/gallium/drivers/i915simple/i915_debug.h new file mode 100644 index 0000000000..16ca7277c7 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_debug.h @@ -0,0 +1,114 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef I915_DEBUG_H +#define I915_DEBUG_H + +#include <stdarg.h> + +struct i915_context; + +struct debug_stream  +{ +   unsigned offset;		/* current gtt offset */ +   char *ptr;		/* pointer to gtt offset zero */ +   char *end;		/* pointer to gtt offset zero */ +   unsigned print_addresses; +}; + + +/* Internal functions + */ +void i915_disassemble_program(struct debug_stream *stream,  +			      const unsigned *program, unsigned sz); + +void i915_print_ureg(const char *msg, unsigned ureg); + + +#define DEBUG_BATCH	 0x1 +#define DEBUG_BLIT       0x2 +#define DEBUG_BUFFER     0x4 +#define DEBUG_CONSTANTS  0x8 +#define DEBUG_CONTEXT    0x10 +#define DEBUG_DRAW	 0x20 +#define DEBUG_DYNAMIC	 0x40 +#define DEBUG_FLUSH      0x80 +#define DEBUG_MAP	 0x100 +#define DEBUG_PROGRAM	 0x200 +#define DEBUG_REGIONS    0x400 +#define DEBUG_SAMPLER	 0x800 +#define DEBUG_STATIC	 0x1000 +#define DEBUG_SURFACE    0x2000 +#define DEBUG_WINSYS     0x4000 + +#include "pipe/p_compiler.h" + +#if defined(DEBUG) && defined(FILE_DEBUG_FLAG) + +#include "pipe/internal/p_winsys_screen.h" + +static INLINE void +I915_DBG( +   struct i915_context  *i915, +   const char           *fmt, +                        ... ) +{ +   if ((i915)->debug & FILE_DEBUG_FLAG) { +      va_list  args; + +      va_start( args, fmt ); +      debug_vprintf( fmt, args ); +      va_end( args ); +   } +} + +#else + +static INLINE void +I915_DBG( +   struct i915_context  *i915, +   const char           *fmt, +                        ... ) +{ +   (void) i915; +   (void) fmt; +} + +#endif + + +struct i915_batchbuffer; + +void i915_dump_batchbuffer( struct i915_batchbuffer *i915 ); + +void i915_debug_init( struct i915_context *i915 ); + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915simple/i915_debug_fp.c new file mode 100644 index 0000000000..9c5b117b6d --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_debug_fp.c @@ -0,0 +1,363 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include "i915_reg.h" +#include "i915_debug.h" +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_memory.h" + + +static void +PRINTF( +   struct debug_stream  *stream, +   const char           *fmt, +                        ... ) +{ +   va_list  args; + +   va_start( args, fmt ); +   debug_vprintf( fmt, args ); +   va_end( args ); +} + + +static const char *opcodes[0x20] = { +   "NOP", +   "ADD", +   "MOV", +   "MUL", +   "MAD", +   "DP2ADD", +   "DP3", +   "DP4", +   "FRC", +   "RCP", +   "RSQ", +   "EXP", +   "LOG", +   "CMP", +   "MIN", +   "MAX", +   "FLR", +   "MOD", +   "TRC", +   "SGE", +   "SLT", +   "TEXLD", +   "TEXLDP", +   "TEXLDB", +   "TEXKILL", +   "DCL", +   "0x1a", +   "0x1b", +   "0x1c", +   "0x1d", +   "0x1e", +   "0x1f", +}; + + +static const int args[0x20] = { +   0,                           /* 0 nop */ +   2,                           /* 1 add */ +   1,                           /* 2 mov */ +   2,                           /* 3 m ul */ +   3,                           /* 4 mad */ +   3,                           /* 5 dp2add */ +   2,                           /* 6 dp3 */ +   2,                           /* 7 dp4 */ +   1,                           /* 8 frc */ +   1,                           /* 9 rcp */ +   1,                           /* a rsq */ +   1,                           /* b exp */ +   1,                           /* c log */ +   3,                           /* d cmp */ +   2,                           /* e min */ +   2,                           /* f max */ +   1,                           /* 10 flr */ +   1,                           /* 11 mod */ +   1,                           /* 12 trc */ +   2,                           /* 13 sge */ +   2,                           /* 14 slt */ +   1, +   1, +   1, +   1, +   0, +   0, +   0, +   0, +   0, +   0, +   0, +}; + + +static const char *regname[0x8] = { +   "R", +   "T", +   "CONST", +   "S", +   "OC", +   "OD", +   "U", +   "UNKNOWN", +}; + +static void +print_reg_type_nr(struct debug_stream *stream, unsigned type, unsigned nr) +{ +   switch (type) { +   case REG_TYPE_T: +      switch (nr) { +      case T_DIFFUSE: +         PRINTF(stream, "T_DIFFUSE"); +         return; +      case T_SPECULAR: +         PRINTF(stream, "T_SPECULAR"); +         return; +      case T_FOG_W: +         PRINTF(stream, "T_FOG_W"); +         return; +      default: +         PRINTF(stream, "T_TEX%d", nr); +         return; +      } +   case REG_TYPE_OC: +      if (nr == 0) { +         PRINTF(stream, "oC"); +         return; +      } +      break; +   case REG_TYPE_OD: +      if (nr == 0) { +         PRINTF(stream, "oD"); +         return; +      } +      break; +   default: +      break; +   } + +   PRINTF(stream, "%s[%d]", regname[type], nr); +} + +#define REG_SWIZZLE_MASK 0x7777 +#define REG_NEGATE_MASK 0x8888 + +#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) |	\ +		      (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) |	\ +		      (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) |	\ +		      (SRC_W << A2_SRC2_CHANNEL_W_SHIFT)) + + +static void +print_reg_neg_swizzle(struct debug_stream *stream, unsigned reg) +{ +   int i; + +   if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW && +       (reg & REG_NEGATE_MASK) == 0) +      return; + +   PRINTF(stream, "."); + +   for (i = 3; i >= 0; i--) { +      if (reg & (1 << ((i * 4) + 3))) +         PRINTF(stream, "-"); + +      switch ((reg >> (i * 4)) & 0x7) { +      case 0: +         PRINTF(stream, "x"); +         break; +      case 1: +         PRINTF(stream, "y"); +         break; +      case 2: +         PRINTF(stream, "z"); +         break; +      case 3: +         PRINTF(stream, "w"); +         break; +      case 4: +         PRINTF(stream, "0"); +         break; +      case 5: +         PRINTF(stream, "1"); +         break; +      default: +         PRINTF(stream, "?"); +         break; +      } +   } +} + + +static void +print_src_reg(struct debug_stream *stream, unsigned dword) +{ +   unsigned nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK; +   unsigned type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK; +   print_reg_type_nr(stream, type, nr); +   print_reg_neg_swizzle(stream, dword); +} + + +static void +print_dest_reg(struct debug_stream *stream, unsigned dword) +{ +   unsigned nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK; +   unsigned type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK; +   print_reg_type_nr(stream, type, nr); +   if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL) +      return; +   PRINTF(stream, "."); +   if (dword & A0_DEST_CHANNEL_X) +      PRINTF(stream, "x"); +   if (dword & A0_DEST_CHANNEL_Y) +      PRINTF(stream, "y"); +   if (dword & A0_DEST_CHANNEL_Z) +      PRINTF(stream, "z"); +   if (dword & A0_DEST_CHANNEL_W) +      PRINTF(stream, "w"); +} + + +#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT)) +#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT)) +#define GET_SRC2_REG(r)      (r) + + +static void +print_arith_op(struct debug_stream *stream,  +	       unsigned opcode, const unsigned * program) +{ +   if (opcode != A0_NOP) { +      print_dest_reg(stream, program[0]); +      if (program[0] & A0_DEST_SATURATE) +         PRINTF(stream, " = SATURATE "); +      else +         PRINTF(stream, " = "); +   } + +   PRINTF(stream, "%s ", opcodes[opcode]); + +   print_src_reg(stream, GET_SRC0_REG(program[0], program[1])); +   if (args[opcode] == 1) { +      PRINTF(stream, "\n"); +      return; +   } + +   PRINTF(stream, ", "); +   print_src_reg(stream, GET_SRC1_REG(program[1], program[2])); +   if (args[opcode] == 2) { +      PRINTF(stream, "\n"); +      return; +   } + +   PRINTF(stream, ", "); +   print_src_reg(stream, GET_SRC2_REG(program[2])); +   PRINTF(stream, "\n"); +   return; +} + + +static void +print_tex_op(struct debug_stream *stream,  +	     unsigned opcode, const unsigned * program) +{ +   print_dest_reg(stream, program[0] | A0_DEST_CHANNEL_ALL); +   PRINTF(stream, " = "); + +   PRINTF(stream, "%s ", opcodes[opcode]); + +   PRINTF(stream, "S[%d],", program[0] & T0_SAMPLER_NR_MASK); + +   print_reg_type_nr(stream,  +		     (program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & +                     REG_TYPE_MASK, +                     (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); +   PRINTF(stream, "\n"); +} + +static void +print_texkil_op(struct debug_stream *stream,  +                unsigned opcode, const unsigned * program) +{ +   PRINTF(stream, "TEXKIL "); + +   print_reg_type_nr(stream,  +		     (program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & +                     REG_TYPE_MASK, +                     (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); +   PRINTF(stream, "\n"); +} + +static void +print_dcl_op(struct debug_stream *stream,  +	     unsigned opcode, const unsigned * program) +{ +   PRINTF(stream, "%s ", opcodes[opcode]); +   print_dest_reg(stream,  +		  program[0] | A0_DEST_CHANNEL_ALL); +   PRINTF(stream, "\n"); +} + + +void +i915_disassemble_program(struct debug_stream *stream,  +			 const unsigned * program, unsigned sz) +{ +   unsigned i; + +   PRINTF(stream, "\t\tBEGIN\n"); + +   assert((program[0] & 0x1ff) + 2 == sz); + +   program++; +   for (i = 1; i < sz; i += 3, program += 3) { +      unsigned opcode = program[0] & (0x1f << 24); + +      PRINTF(stream, "\t\t"); + +      if ((int) opcode >= A0_NOP && opcode <= A0_SLT) +         print_arith_op(stream, opcode >> 24, program); +      else if (opcode >= T0_TEXLD && opcode < T0_TEXKILL) +         print_tex_op(stream, opcode >> 24, program); +      else if (opcode == T0_TEXKILL) +         print_texkil_op(stream, opcode >> 24, program); +      else if (opcode == D0_DCL) +         print_dcl_op(stream, opcode >> 24, program); +      else +         PRINTF(stream, "Unknown opcode 0x%x\n", opcode); +   } + +   PRINTF(stream, "\t\tEND\n\n"); +} + + diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c new file mode 100644 index 0000000000..472e0ab774 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_flush.c @@ -0,0 +1,78 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Author: + *    Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_defines.h" +#include "draw/draw_context.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_batch.h" + + +static void i915_flush( struct pipe_context *pipe, +                        unsigned flags, +                        struct pipe_fence_handle **fence ) +{ +   struct i915_context *i915 = i915_context(pipe); + +   draw_flush(i915->draw); + +   /* Do we need to emit an MI_FLUSH command to flush the hardware +    * caches? +    */ +   if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { +      unsigned flush = MI_FLUSH; +       +      if (!(flags & PIPE_FLUSH_RENDER_CACHE)) +	 flush |= INHIBIT_FLUSH_RENDER_CACHE; + +      if (flags & PIPE_FLUSH_TEXTURE_CACHE) +	 flush |= FLUSH_MAP_CACHE; + +      if (!BEGIN_BATCH(1, 0)) { +	 FLUSH_BATCH(NULL); +	 assert(BEGIN_BATCH(1, 0)); +      } +      OUT_BATCH( flush ); +   } + +   /* If there are no flags, just flush pending commands to hardware: +    */ +   FLUSH_BATCH(fence); +   i915->vbo_flushed = 1; +} + + + +void i915_init_flush_functions( struct i915_context *i915 ) +{ +   i915->pipe.flush = i915_flush; +} diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h new file mode 100644 index 0000000000..2f0f99d046 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_fpc.h @@ -0,0 +1,207 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef I915_FPC_H +#define I915_FPC_H + + +#include "i915_context.h" +#include "i915_reg.h" + + + +#define I915_PROGRAM_SIZE 192 + + + +/** + * Program translation state + */ +struct i915_fp_compile { +   struct i915_fragment_shader *shader;  /* the shader we're compiling */ + +   boolean used_constants[I915_MAX_CONSTANT]; + +   /** maps TGSI immediate index to constant slot */ +   uint num_immediates; +   uint immediates_map[I915_MAX_CONSTANT]; +   float immediates[I915_MAX_CONSTANT][4]; + +   boolean first_instruction; + +   uint declarations[I915_PROGRAM_SIZE]; +   uint program[I915_PROGRAM_SIZE]; + +   uint *csr;            /**< Cursor, points into program. */ + +   uint *decl;           /**< Cursor, points into declarations. */ + +   uint decl_s;          /**< flags for which s regs need to be decl'd */ +   uint decl_t;          /**< flags for which t regs need to be decl'd */ + +   uint temp_flag;       /**< Tracks temporary regs which are in use */ +   uint utemp_flag;      /**< Tracks TYPE_U temporary regs which are in use */ + +   uint nr_tex_indirect; +   uint nr_tex_insn; +   uint nr_alu_insn; +   uint nr_decl_insn; + +   boolean error;      /**< Set if i915_program_error() is called */ +   uint wpos_tex; +   uint NumNativeInstructions; +   uint NumNativeAluInstructions; +   uint NumNativeTexInstructions; +   uint NumNativeTexIndirections; +}; + + +/* Having zero and one in here makes the definition of swizzle a lot + * easier. + */ +#define UREG_TYPE_SHIFT               29 +#define UREG_NR_SHIFT                 24 +#define UREG_CHANNEL_X_NEGATE_SHIFT   23 +#define UREG_CHANNEL_X_SHIFT          20 +#define UREG_CHANNEL_Y_NEGATE_SHIFT   19 +#define UREG_CHANNEL_Y_SHIFT          16 +#define UREG_CHANNEL_Z_NEGATE_SHIFT   15 +#define UREG_CHANNEL_Z_SHIFT          12 +#define UREG_CHANNEL_W_NEGATE_SHIFT   11 +#define UREG_CHANNEL_W_SHIFT          8 +#define UREG_CHANNEL_ZERO_NEGATE_MBZ  5 +#define UREG_CHANNEL_ZERO_SHIFT       4 +#define UREG_CHANNEL_ONE_NEGATE_MBZ   1 +#define UREG_CHANNEL_ONE_SHIFT        0 + +#define UREG_BAD          0xffffffff    /* not a valid ureg */ + +#define X    SRC_X +#define Y    SRC_Y +#define Z    SRC_Z +#define W    SRC_W +#define ZERO SRC_ZERO +#define ONE  SRC_ONE + +/* Construct a ureg: + */ +#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) |		\ +			  ((nr)  << UREG_NR_SHIFT) |		\ +			  (X     << UREG_CHANNEL_X_SHIFT) |	\ +			  (Y     << UREG_CHANNEL_Y_SHIFT) |	\ +			  (Z     << UREG_CHANNEL_Z_SHIFT) |	\ +			  (W     << UREG_CHANNEL_W_SHIFT) |	\ +			  (ZERO  << UREG_CHANNEL_ZERO_SHIFT) |	\ +			  (ONE   << UREG_CHANNEL_ONE_SHIFT)) + +#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & (0xf<<20)) +#define CHANNEL_SRC( src, channel ) (src>>(channel*4)) + +#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)®_TYPE_MASK) +#define GET_UREG_NR(reg)   (((reg)>>UREG_NR_SHIFT)®_NR_MASK) + + + +#define UREG_XYZW_CHANNEL_MASK 0x00ffff00 + +/* One neat thing about the UREG representation:   + */ +static INLINE int +swizzle(int reg, uint x, uint y, uint z, uint w) +{ +   assert(x <= SRC_ONE); +   assert(y <= SRC_ONE); +   assert(z <= SRC_ONE); +   assert(w <= SRC_ONE); +   return ((reg & ~UREG_XYZW_CHANNEL_MASK) | +           CHANNEL_SRC(GET_CHANNEL_SRC(reg, x), 0) | +           CHANNEL_SRC(GET_CHANNEL_SRC(reg, y), 1) | +           CHANNEL_SRC(GET_CHANNEL_SRC(reg, z), 2) | +           CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3)); +} + + + +/*********************************************************************** + * Public interface for the compiler + */ +extern void +i915_translate_fragment_program( struct i915_context *i915, +                                 struct i915_fragment_shader *fs); + + + +extern uint i915_get_temp(struct i915_fp_compile *p); +extern uint i915_get_utemp(struct i915_fp_compile *p); +extern void i915_release_utemps(struct i915_fp_compile *p); + + +extern uint i915_emit_texld(struct i915_fp_compile *p, +                              uint dest, +                              uint destmask, +                              uint sampler, uint coord, uint op); + +extern uint i915_emit_arith(struct i915_fp_compile *p, +                              uint op, +                              uint dest, +                              uint mask, +                              uint saturate, +                              uint src0, uint src1, uint src2); + +extern uint i915_emit_decl(struct i915_fp_compile *p, +                             uint type, uint nr, uint d0_flags); + + +extern uint i915_emit_const1f(struct i915_fp_compile *p, float c0); + +extern uint i915_emit_const2f(struct i915_fp_compile *p, +                                float c0, float c1); + +extern uint i915_emit_const4fv(struct i915_fp_compile *p, +                                 const float * c); + +extern uint i915_emit_const4f(struct i915_fp_compile *p, +                                float c0, float c1, +                                float c2, float c3); + + +/*====================================================================== + * i915_fpc_debug.c + */ +extern void i915_disassemble_program(const uint * program, uint sz); + + +/*====================================================================== + * i915_fpc_translate.c + */ + +extern void +i915_program_error(struct i915_fp_compile *p, const char *msg, ...); + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c new file mode 100644 index 0000000000..b054ce41d3 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_fpc_emit.c @@ -0,0 +1,375 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" +#include "util/u_math.h" + + +#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT) +#define A1_SRC0( reg ) (((reg)&UREG_MASK)<<UREG_A1_SRC0_SHIFT_RIGHT) +#define A1_SRC1( reg ) (((reg)&UREG_MASK)>>UREG_A1_SRC1_SHIFT_LEFT) +#define A2_SRC1( reg ) (((reg)&UREG_MASK)<<UREG_A2_SRC1_SHIFT_RIGHT) +#define A2_SRC2( reg ) (((reg)&UREG_MASK)>>UREG_A2_SRC2_SHIFT_LEFT) + +/* These are special, and don't have swizzle/negate bits. + */ +#define T0_SAMPLER( reg )     (GET_UREG_NR(reg)<<T0_SAMPLER_NR_SHIFT) +#define T1_ADDRESS_REG( reg ) ((GET_UREG_NR(reg)<<T1_ADDRESS_REG_NR_SHIFT) | \ +			       (GET_UREG_TYPE(reg)<<T1_ADDRESS_REG_TYPE_SHIFT)) + + +/* Macros for translating UREG's into the various register fields used + * by the I915 programmable unit. + */ +#define UREG_A0_DEST_SHIFT_LEFT  (UREG_TYPE_SHIFT - A0_DEST_TYPE_SHIFT) +#define UREG_A0_SRC0_SHIFT_LEFT  (UREG_TYPE_SHIFT - A0_SRC0_TYPE_SHIFT) +#define UREG_A1_SRC0_SHIFT_RIGHT (A1_SRC0_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT) +#define UREG_A1_SRC1_SHIFT_LEFT  (UREG_TYPE_SHIFT - A1_SRC1_TYPE_SHIFT) +#define UREG_A2_SRC1_SHIFT_RIGHT (A2_SRC1_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT) +#define UREG_A2_SRC2_SHIFT_LEFT  (UREG_TYPE_SHIFT - A2_SRC2_TYPE_SHIFT) + +#define UREG_MASK         0xffffff00 +#define UREG_TYPE_NR_MASK ((REG_TYPE_MASK << UREG_TYPE_SHIFT) | \ +  			   (REG_NR_MASK << UREG_NR_SHIFT)) + + +uint +i915_get_temp(struct i915_fp_compile *p) +{ +   int bit = ffs(~p->temp_flag); +   if (!bit) { +      i915_program_error(p, "i915_get_temp: out of temporaries\n"); +      return 0; +   } + +   p->temp_flag |= 1 << (bit - 1); +   return bit - 1; +} + + +static void +i915_release_temp(struct i915_fp_compile *p, int reg) +{ +   p->temp_flag &= ~(1 << reg); +} + + +/** + * Get unpreserved temporary, a temp whose value is not preserved between + * PS program phases. + */ +uint +i915_get_utemp(struct i915_fp_compile * p) +{ +   int bit = ffs(~p->utemp_flag); +   if (!bit) { +      i915_program_error(p, "i915_get_utemp: out of temporaries\n"); +      return 0; +   } + +   p->utemp_flag |= 1 << (bit - 1); +   return UREG(REG_TYPE_U, (bit - 1)); +} + +void +i915_release_utemps(struct i915_fp_compile *p) +{ +   p->utemp_flag = ~0x7; +} + + +uint +i915_emit_decl(struct i915_fp_compile *p, +               uint type, uint nr, uint d0_flags) +{ +   uint reg = UREG(type, nr); + +   if (type == REG_TYPE_T) { +      if (p->decl_t & (1 << nr)) +         return reg; + +      p->decl_t |= (1 << nr); +   } +   else if (type == REG_TYPE_S) { +      if (p->decl_s & (1 << nr)) +         return reg; + +      p->decl_s |= (1 << nr); +   } +   else +      return reg; + +   *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); +   *(p->decl++) = D1_MBZ; +   *(p->decl++) = D2_MBZ; + +   p->nr_decl_insn++; +   return reg; +} + +uint +i915_emit_arith(struct i915_fp_compile * p, +                uint op, +                uint dest, +                uint mask, +                uint saturate, uint src0, uint src1, uint src2) +{ +   uint c[3]; +   uint nr_const = 0; + +   assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); +   dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)); +   assert(dest); + +   if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) +      c[nr_const++] = 0; +   if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) +      c[nr_const++] = 1; +   if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) +      c[nr_const++] = 2; + +   /* Recursively call this function to MOV additional const values +    * into temporary registers.  Use utemp registers for this - +    * currently shouldn't be possible to run out, but keep an eye on +    * this. +    */ +   if (nr_const > 1) { +      uint s[3], first, i, old_utemp_flag; + +      s[0] = src0; +      s[1] = src1; +      s[2] = src2; +      old_utemp_flag = p->utemp_flag; + +      first = GET_UREG_NR(s[c[0]]); +      for (i = 1; i < nr_const; i++) { +         if (GET_UREG_NR(s[c[i]]) != first) { +            uint tmp = i915_get_utemp(p); + +            i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, +                            s[c[i]], 0, 0); +            s[c[i]] = tmp; +         } +      } + +      src0 = s[0]; +      src1 = s[1]; +      src2 = s[2]; +      p->utemp_flag = old_utemp_flag;   /* restore */ +   } + +   *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); +   *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); +   *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); + +   p->nr_alu_insn++; +   return dest; +} + + +/** + * Emit a texture load or texkill instruction. + * \param dest  the dest i915 register + * \param destmask  the dest register writemask + * \param sampler  the i915 sampler register + * \param coord  the i915 source texcoord operand + * \param opcode  the instruction opcode + */ +uint i915_emit_texld( struct i915_fp_compile *p, +			uint dest, +			uint destmask, +			uint sampler, +			uint coord, +			uint opcode ) +{ +   const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); +   int temp = -1; + +   if (coord != k) { +      /* texcoord is swizzled or negated.  Need to allocate a new temporary +       * register (a utemp / unpreserved temp) won't do. +       */ +      uint tempReg; + +      temp = i915_get_temp(p);           /* get temp reg index */ +      tempReg = UREG(REG_TYPE_R, temp);  /* make i915 register */ + +      i915_emit_arith( p, A0_MOV, +                       tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */ +                       0,                            /* saturate */ +                       coord, 0, 0 );                /* src0, src1, src2 */ + +      /* new src texcoord is tempReg */ +      coord = tempReg; +   } + +   /* Don't worry about saturate as we only support   +    */ +   if (destmask != A0_DEST_CHANNEL_ALL) { +      /* if not writing to XYZW... */ +      uint tmp = i915_get_utemp(p); +      i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode ); +      i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); +      /* XXX release utemp here? */ +   } +   else { +      assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); +      assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); + +      /* is the sampler coord a texcoord input reg? */ +      if (GET_UREG_TYPE(coord) != REG_TYPE_T) { +	 p->nr_tex_indirect++; +      } + +      *(p->csr++) = (opcode |  +		     T0_DEST( dest ) | +		     T0_SAMPLER( sampler )); + +      *(p->csr++) = T1_ADDRESS_REG( coord ); +      *(p->csr++) = T2_MBZ; + +      p->nr_tex_insn++; +   } + +   if (temp >= 0) +      i915_release_temp(p, temp); + +   return dest; +} + + +uint +i915_emit_const1f(struct i915_fp_compile * p, float c0) +{ +   struct i915_fragment_shader *ifs = p->shader; +   unsigned reg, idx; + +   if (c0 == 0.0) +      return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); +   if (c0 == 1.0) +      return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); + +   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { +      if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER) +         continue; +      for (idx = 0; idx < 4; idx++) { +         if (!(ifs->constant_flags[reg] & (1 << idx)) || +             ifs->constants[reg][idx] == c0) { +            ifs->constants[reg][idx] = c0; +            ifs->constant_flags[reg] |= 1 << idx; +            if (reg + 1 > ifs->num_constants) +               ifs->num_constants = reg + 1; +            return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); +         } +      } +   } + +   i915_program_error(p, "i915_emit_const1f: out of constants\n"); +   return 0; +} + +uint +i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) +{ +   struct i915_fragment_shader *ifs = p->shader; +   unsigned reg, idx; + +   if (c0 == 0.0) +      return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); +   if (c0 == 1.0) +      return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); + +   if (c1 == 0.0) +      return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); +   if (c1 == 1.0) +      return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); + +   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { +      if (ifs->constant_flags[reg] == 0xf || +          ifs->constant_flags[reg] == I915_CONSTFLAG_USER) +         continue; +      for (idx = 0; idx < 3; idx++) { +         if (!(ifs->constant_flags[reg] & (3 << idx))) { +            ifs->constants[reg][idx + 0] = c0; +            ifs->constants[reg][idx + 1] = c1; +            ifs->constant_flags[reg] |= 3 << idx; +            if (reg + 1 > ifs->num_constants) +               ifs->num_constants = reg + 1; +            return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); +         } +      } +   } + +   i915_program_error(p, "i915_emit_const2f: out of constants\n"); +   return 0; +} + + + +uint +i915_emit_const4f(struct i915_fp_compile * p, +                  float c0, float c1, float c2, float c3) +{ +   struct i915_fragment_shader *ifs = p->shader; +   unsigned reg; + +   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { +      if (ifs->constant_flags[reg] == 0xf && +          ifs->constants[reg][0] == c0 && +          ifs->constants[reg][1] == c1 && +          ifs->constants[reg][2] == c2 && +          ifs->constants[reg][3] == c3) { +         return UREG(REG_TYPE_CONST, reg); +      } +      else if (ifs->constant_flags[reg] == 0) { + +         ifs->constants[reg][0] = c0; +         ifs->constants[reg][1] = c1; +         ifs->constants[reg][2] = c2; +         ifs->constants[reg][3] = c3; +         ifs->constant_flags[reg] = 0xf; +         if (reg + 1 > ifs->num_constants) +            ifs->num_constants = reg + 1; +         return UREG(REG_TYPE_CONST, reg); +      } +   } + +   i915_program_error(p, "i915_emit_const4f: out of constants\n"); +   return 0; +} + + +uint +i915_emit_const4fv(struct i915_fp_compile * p, const float * c) +{ +   return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); +} diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c new file mode 100644 index 0000000000..d92bdc1bc6 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -0,0 +1,1190 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include <stdarg.h> + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" + +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_string.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" + +#include "draw/draw_vertex.h" + + +/** + * Simple pass-through fragment shader to use when we don't have + * a real shader (or it fails to compile for some reason). + */ +static unsigned passthrough[] =  +{ +   _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1), + +   /* declare input color: +    */ +   (D0_DCL |  +    (REG_TYPE_T << D0_TYPE_SHIFT) |  +    (T_DIFFUSE << D0_NR_SHIFT) |  +    D0_CHANNEL_ALL), +   0, +   0, + +   /* move to output color: +    */ +   (A0_MOV |  +    (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) |  +    A0_DEST_CHANNEL_ALL |  +    (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) | +    (T_DIFFUSE << A0_SRC0_NR_SHIFT)), +   0x01230000,			/* .xyzw */ +   0 +}; + + +/* 1, -1/3!, 1/5!, -1/7! */ +static const float sin_constants[4] = { 1.0, +   -1.0f / (3 * 2 * 1), +   1.0f / (5 * 4 * 3 * 2 * 1), +   -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1) +}; + +/* 1, -1/2!, 1/4!, -1/6! */ +static const float cos_constants[4] = { 1.0, +   -1.0f / (2 * 1), +   1.0f / (4 * 3 * 2 * 1), +   -1.0f / (6 * 5 * 4 * 3 * 2 * 1) +}; + + + +/** + * component-wise negation of ureg + */ +static INLINE int +negate(int reg, int x, int y, int z, int w) +{ +   /* Another neat thing about the UREG representation */ +   return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | +                 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | +                 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | +                 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); +} + + +/** + * In the event of a translation failure, we'll generate a simple color + * pass-through program. + */ +static void +i915_use_passthrough_shader(struct i915_fragment_shader *fs) +{ +   fs->program = (uint *) MALLOC(sizeof(passthrough)); +   if (fs->program) { +      memcpy(fs->program, passthrough, sizeof(passthrough)); +      fs->program_len = Elements(passthrough); +   } +   fs->num_constants = 0; +} + + +void +i915_program_error(struct i915_fp_compile *p, const char *msg, ...) +{ +   va_list args; +   char buffer[1024]; + +   debug_printf("i915_program_error: "); +   va_start( args, msg );   +   util_vsnprintf( buffer, sizeof(buffer), msg, args ); +   va_end( args ); +   debug_printf(buffer); +   debug_printf("\n"); + +   p->error = 1; +} + + + +/** + * Construct a ureg for the given source register.  Will emit + * constants, apply swizzling and negation as needed. + */ +static uint +src_vector(struct i915_fp_compile *p, +           const struct tgsi_full_src_register *source) +{ +   uint index = source->SrcRegister.Index; +   uint src = 0, sem_name, sem_ind; + +   switch (source->SrcRegister.File) { +   case TGSI_FILE_TEMPORARY: +      if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) { +         i915_program_error(p, "Exceeded max temporary reg"); +         return 0; +      } +      src = UREG(REG_TYPE_R, index); +      break; +   case TGSI_FILE_INPUT: +      /* XXX: Packing COL1, FOGC into a single attribute works for +       * texenv programs, but will fail for real fragment programs +       * that use these attributes and expect them to be a full 4 +       * components wide.  Could use a texcoord to pass these +       * attributes if necessary, but that won't work in the general +       * case. +       *  +       * We also use a texture coordinate to pass wpos when possible. +       */ + +      sem_name = p->shader->info.input_semantic_name[index]; +      sem_ind = p->shader->info.input_semantic_index[index]; + +      switch (sem_name) { +      case TGSI_SEMANTIC_POSITION: +         debug_printf("SKIP SEM POS\n"); +         /* +         assert(p->wpos_tex != -1); +         src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); +         */ +         break; +      case TGSI_SEMANTIC_COLOR: +         if (sem_ind == 0) { +            src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); +         } +         else { +            /* secondary color */ +            assert(sem_ind == 1); +            src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); +            src = swizzle(src, X, Y, Z, ONE); +         } +         break; +      case TGSI_SEMANTIC_FOG: +         src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); +         src = swizzle(src, W, W, W, W); +         break; +      case TGSI_SEMANTIC_GENERIC: +         /* usually a texcoord */ +         src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind, D0_CHANNEL_ALL); +         break; +      default: +         i915_program_error(p, "Bad source->Index"); +         return 0; +      } +      break; + +   case TGSI_FILE_IMMEDIATE: +      assert(index < p->num_immediates); +      index = p->immediates_map[index]; +      /* fall-through */ +   case TGSI_FILE_CONSTANT: +      src = UREG(REG_TYPE_CONST, index); +      break; + +   default: +      i915_program_error(p, "Bad source->File"); +      return 0; +   } + +   if (source->SrcRegister.Extended) { +      src = swizzle(src, +                    source->SrcRegisterExtSwz.ExtSwizzleX, +                    source->SrcRegisterExtSwz.ExtSwizzleY, +                    source->SrcRegisterExtSwz.ExtSwizzleZ, +                    source->SrcRegisterExtSwz.ExtSwizzleW); +   } +   else { +      src = swizzle(src, +                    source->SrcRegister.SwizzleX, +                    source->SrcRegister.SwizzleY, +                    source->SrcRegister.SwizzleZ, +                    source->SrcRegister.SwizzleW); +   } + + +   /* There's both negate-all-components and per-component negation. +    * Try to handle both here. +    */ +   { +      int nx = source->SrcRegisterExtSwz.NegateX; +      int ny = source->SrcRegisterExtSwz.NegateY; +      int nz = source->SrcRegisterExtSwz.NegateZ; +      int nw = source->SrcRegisterExtSwz.NegateW; +      if (source->SrcRegister.Negate) { +         nx = !nx; +         ny = !ny; +         nz = !nz; +         nw = !nw; +      } +      src = negate(src, nx, ny, nz, nw); +   } + +   /* no abs() or post-abs negation */ +#if 0 +   /* XXX assertions disabled to allow arbfplight.c to run */ +   /* XXX enable these assertions, or fix things */ +   assert(!source->SrcRegisterExtMod.Absolute); +   assert(!source->SrcRegisterExtMod.Negate); +#endif +   return src; +} + + +/** + * Construct a ureg for a destination register. + */ +static uint +get_result_vector(struct i915_fp_compile *p, +                  const struct tgsi_full_dst_register *dest) +{ +   switch (dest->DstRegister.File) { +   case TGSI_FILE_OUTPUT: +      { +         uint sem_name = p->shader->info.output_semantic_name[dest->DstRegister.Index]; +         switch (sem_name) { +         case TGSI_SEMANTIC_POSITION: +            return UREG(REG_TYPE_OD, 0); +         case TGSI_SEMANTIC_COLOR: +            return UREG(REG_TYPE_OC, 0); +         default: +            i915_program_error(p, "Bad inst->DstReg.Index/semantics"); +            return 0; +         } +      } +   case TGSI_FILE_TEMPORARY: +      return UREG(REG_TYPE_R, dest->DstRegister.Index); +   default: +      i915_program_error(p, "Bad inst->DstReg.File"); +      return 0; +   } +} + + +/** + * Compute flags for saturation and writemask. + */ +static uint +get_result_flags(const struct tgsi_full_instruction *inst) +{ +   const uint writeMask +      = inst->FullDstRegisters[0].DstRegister.WriteMask; +   uint flags = 0x0; + +   if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) +      flags |= A0_DEST_SATURATE; + +   if (writeMask & TGSI_WRITEMASK_X) +      flags |= A0_DEST_CHANNEL_X; +   if (writeMask & TGSI_WRITEMASK_Y) +      flags |= A0_DEST_CHANNEL_Y; +   if (writeMask & TGSI_WRITEMASK_Z) +      flags |= A0_DEST_CHANNEL_Z; +   if (writeMask & TGSI_WRITEMASK_W) +      flags |= A0_DEST_CHANNEL_W; + +   return flags; +} + + +/** + * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token + */ +static uint +translate_tex_src_target(struct i915_fp_compile *p, uint tex) +{ +   switch (tex) { +   case TGSI_TEXTURE_1D: +      return D0_SAMPLE_TYPE_2D; +   case TGSI_TEXTURE_2D: +      return D0_SAMPLE_TYPE_2D; +   case TGSI_TEXTURE_RECT: +      return D0_SAMPLE_TYPE_2D; +   case TGSI_TEXTURE_3D: +      return D0_SAMPLE_TYPE_VOLUME; +   case TGSI_TEXTURE_CUBE: +      return D0_SAMPLE_TYPE_CUBE; +   default: +      i915_program_error(p, "TexSrc type"); +      return 0; +   } +} + + +/** + * Generate texel lookup instruction. + */ +static void +emit_tex(struct i915_fp_compile *p, +         const struct tgsi_full_instruction *inst, +         uint opcode) +{ +   uint texture = inst->InstructionExtTexture.Texture; +   uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; +   uint tex = translate_tex_src_target( p, texture ); +   uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); +   uint coord = src_vector( p, &inst->FullSrcRegisters[0]); + +   i915_emit_texld( p, +                    get_result_vector( p, &inst->FullDstRegisters[0] ), +                    get_result_flags( inst ), +                    sampler, +                    coord, +                    opcode); +} + + +/** + * Generate a simple arithmetic instruction + * \param opcode  the i915 opcode + * \param numArgs  the number of input/src arguments + */ +static void +emit_simple_arith(struct i915_fp_compile *p, +                  const struct tgsi_full_instruction *inst, +                  uint opcode, uint numArgs) +{ +   uint arg1, arg2, arg3; + +   assert(numArgs <= 3); + +   arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] ); +   arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] ); +   arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] ); + +   i915_emit_arith( p, +                    opcode, +                    get_result_vector( p, &inst->FullDstRegisters[0]), +                    get_result_flags( inst ), 0, +                    arg1, +                    arg2, +                    arg3 ); +} + + +/** As above, but swap the first two src regs */ +static void +emit_simple_arith_swap2(struct i915_fp_compile *p, +                        const struct tgsi_full_instruction *inst, +                        uint opcode, uint numArgs) +{ +   struct tgsi_full_instruction inst2; + +   assert(numArgs == 2); + +   /* transpose first two registers */ +   inst2 = *inst; +   inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1]; +   inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0]; + +   emit_simple_arith(p, &inst2, opcode, numArgs); +} + + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +/* + * Translate TGSI instruction to i915 instruction. + * + * Possible concerns: + * + * SIN, COS -- could use another taylor step? + * LIT      -- results seem a little different to sw mesa + * LOG      -- different to mesa on negative numbers, but this is conformant. + */  +static void +i915_translate_instruction(struct i915_fp_compile *p, +                           const struct tgsi_full_instruction *inst) +{ +   uint writemask; +   uint src0, src1, src2, flags; +   uint tmp = 0; + +   switch (inst->Instruction.Opcode) { +   case TGSI_OPCODE_ABS: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      i915_emit_arith(p, +                      A0_MAX, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      src0, negate(src0, 1, 1, 1, 1), 0); +      break; + +   case TGSI_OPCODE_ADD: +      emit_simple_arith(p, inst, A0_ADD, 2); +      break; + +   case TGSI_OPCODE_CMP: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      src1 = src_vector(p, &inst->FullSrcRegisters[1]); +      src2 = src_vector(p, &inst->FullSrcRegisters[2]); +      i915_emit_arith(p, A0_CMP,  +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst),  +                      0, src0, src2, src1);   /* NOTE: order of src2, src1 */ +      break; + +   case TGSI_OPCODE_COS: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      tmp = i915_get_utemp(p); + +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_X, 0, +                      src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); + +      i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + +      /* By choosing different taylor constants, could get rid of this mul: +       */ +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_X, 0, +                      tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); + +      /*  +       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1 +       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 +       * t0 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1 +       * result = DP4 t0, cos_constants +       */ +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_XY, 0, +                      swizzle(tmp, X, X, ONE, ONE), +                      swizzle(tmp, X, ONE, ONE, ONE), 0); + +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_XYZ, 0, +                      swizzle(tmp, X, Y, X, ONE), +                      swizzle(tmp, X, X, ONE, ONE), 0); + +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_XYZ, 0, +                      swizzle(tmp, X, X, Z, ONE), +                      swizzle(tmp, Z, ONE, ONE, ONE), 0); + +      i915_emit_arith(p, +                      A0_DP4, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      swizzle(tmp, ONE, Z, Y, X), +                      i915_emit_const4fv(p, cos_constants), 0); +      break; + +   case TGSI_OPCODE_DP3: +      emit_simple_arith(p, inst, A0_DP3, 2); +      break; + +   case TGSI_OPCODE_DP4: +      emit_simple_arith(p, inst, A0_DP4, 2); +      break; + +   case TGSI_OPCODE_DPH: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      src1 = src_vector(p, &inst->FullSrcRegisters[1]); + +      i915_emit_arith(p, +                      A0_DP4, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      swizzle(src0, X, Y, Z, ONE), src1, 0); +      break; + +   case TGSI_OPCODE_DST: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      src1 = src_vector(p, &inst->FullSrcRegisters[1]); + +      /* result[0] = 1    * 1; +       * result[1] = a[1] * b[1]; +       * result[2] = a[2] * 1; +       * result[3] = 1    * b[3]; +       */ +      i915_emit_arith(p, +                      A0_MUL, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      swizzle(src0, ONE, Y, Z, ONE), +                      swizzle(src1, ONE, Y, ONE, W), 0); +      break; + +   case TGSI_OPCODE_END: +      /* no-op */ +      break; + +   case TGSI_OPCODE_EX2: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); + +      i915_emit_arith(p, +                      A0_EXP, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      swizzle(src0, X, X, X, X), 0, 0); +      break; + +   case TGSI_OPCODE_FLR: +      emit_simple_arith(p, inst, A0_FLR, 1); +      break; + +   case TGSI_OPCODE_FRC: +      emit_simple_arith(p, inst, A0_FRC, 1); +      break; + +   case TGSI_OPCODE_KIL: +      /* kill if src[0].x < 0 || src[0].y < 0 ... */ +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      tmp = i915_get_utemp(p); + +      i915_emit_texld(p, +                      tmp,                   /* dest reg: a dummy reg */ +                      A0_DEST_CHANNEL_ALL,   /* dest writemask */ +                      0,                     /* sampler */ +                      src0,                  /* coord*/ +                      T0_TEXKILL);           /* opcode */ +      break; + +   case TGSI_OPCODE_KILP: +      assert(0); /* not tested yet */ +      break; + +   case TGSI_OPCODE_LG2: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); + +      i915_emit_arith(p, +                      A0_LOG, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      swizzle(src0, X, X, X, X), 0, 0); +      break; + +   case TGSI_OPCODE_LIT: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      tmp = i915_get_utemp(p); + +      /* tmp = max( a.xyzw, a.00zw ) +       * XXX: Clamp tmp.w to -128..128 +       * tmp.y = log(tmp.y) +       * tmp.y = tmp.w * tmp.y +       * tmp.y = exp(tmp.y) +       * result = cmp (a.11-x1, a.1x01, a.1xy1 ) +       */ +      i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, +                      src0, swizzle(src0, ZERO, ZERO, Z, W), 0); + +      i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, +                      swizzle(tmp, Y, Y, Y, Y), 0, 0); + +      i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, +                      swizzle(tmp, ZERO, Y, ZERO, ZERO), +                      swizzle(tmp, ZERO, W, ZERO, ZERO), 0); + +      i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, +                      swizzle(tmp, Y, Y, Y, Y), 0, 0); + +      i915_emit_arith(p, A0_CMP, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), +                      swizzle(tmp, ONE, X, ZERO, ONE), +                      swizzle(tmp, ONE, X, Y, ONE)); + +      break; + +   case TGSI_OPCODE_LRP: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      src1 = src_vector(p, &inst->FullSrcRegisters[1]); +      src2 = src_vector(p, &inst->FullSrcRegisters[2]); +      flags = get_result_flags(inst); +      tmp = i915_get_utemp(p); + +      /* b*a + c*(1-a) +       * +       * b*a + c - ca  +       * +       * tmp = b*a + c,  +       * result = (-c)*a + tmp  +       */ +      i915_emit_arith(p, A0_MAD, tmp, +                      flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); + +      i915_emit_arith(p, A0_MAD, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); +      break; + +   case TGSI_OPCODE_MAD: +      emit_simple_arith(p, inst, A0_MAD, 3); +      break; + +   case TGSI_OPCODE_MAX: +      emit_simple_arith(p, inst, A0_MAX, 2); +      break; + +   case TGSI_OPCODE_MIN: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      src1 = src_vector(p, &inst->FullSrcRegisters[1]); +      tmp = i915_get_utemp(p); +      flags = get_result_flags(inst); + +      i915_emit_arith(p, +                      A0_MAX, +                      tmp, flags & A0_DEST_CHANNEL_ALL, 0, +                      negate(src0, 1, 1, 1, 1), +                      negate(src1, 1, 1, 1, 1), 0); + +      i915_emit_arith(p, +                      A0_MOV, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); +      break; + +   case TGSI_OPCODE_MOV: +   case TGSI_OPCODE_SWZ: +      emit_simple_arith(p, inst, A0_MOV, 1); +      break; + +   case TGSI_OPCODE_MUL: +      emit_simple_arith(p, inst, A0_MUL, 2); +      break; + +   case TGSI_OPCODE_POW: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      src1 = src_vector(p, &inst->FullSrcRegisters[1]); +      tmp = i915_get_utemp(p); +      flags = get_result_flags(inst); + +      /* XXX: masking on intermediate values, here and elsewhere. +       */ +      i915_emit_arith(p, +                      A0_LOG, +                      tmp, A0_DEST_CHANNEL_X, 0, +                      swizzle(src0, X, X, X, X), 0, 0); + +      i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); + +      i915_emit_arith(p, +                      A0_EXP, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      flags, 0, swizzle(tmp, X, X, X, X), 0, 0); +      break; +       +   case TGSI_OPCODE_RET: +      /* XXX: no-op? */ +      break; +       +   case TGSI_OPCODE_RCP: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); + +      i915_emit_arith(p, +                      A0_RCP, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                         get_result_flags(inst), 0, +                      swizzle(src0, X, X, X, X), 0, 0); +      break; + +   case TGSI_OPCODE_RSQ: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); + +      i915_emit_arith(p, +                      A0_RSQ, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      swizzle(src0, X, X, X, X), 0, 0); +      break; + +   case TGSI_OPCODE_SCS: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      tmp = i915_get_utemp(p); + +      /*  +       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1 +       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x +       * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x +       * scs.x = DP4 t1, sin_constants +       * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1 +       * scs.y = DP4 t1, cos_constants +       */ +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_XY, 0, +                      swizzle(src0, X, X, ONE, ONE), +                      swizzle(src0, X, ONE, ONE, ONE), 0); + +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_ALL, 0, +                      swizzle(tmp, X, Y, X, Y), +                      swizzle(tmp, X, X, ONE, ONE), 0); + +      writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + +      if (writemask & TGSI_WRITEMASK_Y) { +         uint tmp1; + +         if (writemask & TGSI_WRITEMASK_X) +            tmp1 = i915_get_utemp(p); +         else +            tmp1 = tmp; + +         i915_emit_arith(p, +                         A0_MUL, +                         tmp1, A0_DEST_CHANNEL_ALL, 0, +                         swizzle(tmp, X, Y, Y, W), +                         swizzle(tmp, X, Z, ONE, ONE), 0); + +         i915_emit_arith(p, +                         A0_DP4, +                         get_result_vector(p, &inst->FullDstRegisters[0]), +                         A0_DEST_CHANNEL_Y, 0, +                         swizzle(tmp1, W, Z, Y, X), +                         i915_emit_const4fv(p, sin_constants), 0); +      } + +      if (writemask & TGSI_WRITEMASK_X) { +         i915_emit_arith(p, +                         A0_MUL, +                         tmp, A0_DEST_CHANNEL_XYZ, 0, +                         swizzle(tmp, X, X, Z, ONE), +                         swizzle(tmp, Z, ONE, ONE, ONE), 0); + +         i915_emit_arith(p, +                         A0_DP4, +                         get_result_vector(p, &inst->FullDstRegisters[0]), +                         A0_DEST_CHANNEL_X, 0, +                         swizzle(tmp, ONE, Z, Y, X), +                         i915_emit_const4fv(p, cos_constants), 0); +      } +      break; + +   case TGSI_OPCODE_SGE: +      emit_simple_arith(p, inst, A0_SGE, 2); +      break; + +   case TGSI_OPCODE_SLE: +      /* like SGE, but swap reg0, reg1 */ +      emit_simple_arith_swap2(p, inst, A0_SGE, 2); +      break; + +   case TGSI_OPCODE_SIN: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      tmp = i915_get_utemp(p); + +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_X, 0, +                      src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); + +      i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + +      /* By choosing different taylor constants, could get rid of this mul: +       */ +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_X, 0, +                      tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); + +      /*  +       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1 +       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x +       * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x +       * result = DP4 t1.wzyx, sin_constants +       */ +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_XY, 0, +                      swizzle(tmp, X, X, ONE, ONE), +                      swizzle(tmp, X, ONE, ONE, ONE), 0); + +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_ALL, 0, +                      swizzle(tmp, X, Y, X, Y), +                      swizzle(tmp, X, X, ONE, ONE), 0); + +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_ALL, 0, +                      swizzle(tmp, X, Y, Y, W), +                      swizzle(tmp, X, Z, ONE, ONE), 0); + +      i915_emit_arith(p, +                      A0_DP4, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      swizzle(tmp, W, Z, Y, X), +                      i915_emit_const4fv(p, sin_constants), 0); +      break; + +   case TGSI_OPCODE_SLT: +      emit_simple_arith(p, inst, A0_SLT, 2); +      break; + +   case TGSI_OPCODE_SGT: +      /* like SLT, but swap reg0, reg1 */ +      emit_simple_arith_swap2(p, inst, A0_SLT, 2); +      break; + +   case TGSI_OPCODE_SUB: +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      src1 = src_vector(p, &inst->FullSrcRegisters[1]); + +      i915_emit_arith(p, +                      A0_ADD, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      src0, negate(src1, 1, 1, 1, 1), 0); +      break; + +   case TGSI_OPCODE_TEX: +      emit_tex(p, inst, T0_TEXLD); +      break; + +   case TGSI_OPCODE_TXB: +      emit_tex(p, inst, T0_TEXLDB); +      break; + +   case TGSI_OPCODE_TXP: +      emit_tex(p, inst, T0_TEXLDP); +      break; + +   case TGSI_OPCODE_XPD: +      /* Cross product: +       *      result.x = src0.y * src1.z - src0.z * src1.y; +       *      result.y = src0.z * src1.x - src0.x * src1.z; +       *      result.z = src0.x * src1.y - src0.y * src1.x; +       *      result.w = undef; +       */ +      src0 = src_vector(p, &inst->FullSrcRegisters[0]); +      src1 = src_vector(p, &inst->FullSrcRegisters[1]); +      tmp = i915_get_utemp(p); + +      i915_emit_arith(p, +                      A0_MUL, +                      tmp, A0_DEST_CHANNEL_ALL, 0, +                      swizzle(src0, Z, X, Y, ONE), +                      swizzle(src1, Y, Z, X, ONE), 0); + +      i915_emit_arith(p, +                      A0_MAD, +                      get_result_vector(p, &inst->FullDstRegisters[0]), +                      get_result_flags(inst), 0, +                      swizzle(src0, Y, Z, X, ONE), +                      swizzle(src1, Z, X, Y, ONE), +                      negate(tmp, 1, 1, 1, 0)); +      break; + +   default: +      i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); +      p->error = 1; +      return; +   } + +   i915_release_utemps(p); +} + + +/** + * Translate TGSI fragment shader into i915 hardware instructions. + * \param p  the translation state + * \param tokens  the TGSI token array + */ +static void +i915_translate_instructions(struct i915_fp_compile *p, +                            const struct tgsi_token *tokens) +{ +   struct i915_fragment_shader *ifs = p->shader; +   struct tgsi_parse_context parse; + +   tgsi_parse_init( &parse, tokens ); + +   while( !tgsi_parse_end_of_tokens( &parse ) ) { + +      tgsi_parse_token( &parse ); + +      switch( parse.FullToken.Token.Type ) { +      case TGSI_TOKEN_TYPE_DECLARATION: +         if (parse.FullToken.FullDeclaration.Declaration.File +                  == TGSI_FILE_CONSTANT) { +            uint i; +            for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; +                 i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; +                 i++) { +               assert(ifs->constant_flags[i] == 0x0); +               ifs->constant_flags[i] = I915_CONSTFLAG_USER; +               ifs->num_constants = MAX2(ifs->num_constants, i + 1); +            } +         } +         else if (parse.FullToken.FullDeclaration.Declaration.File +                  == TGSI_FILE_TEMPORARY) { +            uint i; +            for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; +                 i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; +                 i++) { +               assert(i < I915_MAX_TEMPORARY); +               /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ +               p->temp_flag |= (1 << i); /* mark temp as used */ +            } +         } +         break; + +      case TGSI_TOKEN_TYPE_IMMEDIATE: +         { +            const struct tgsi_full_immediate *imm +               = &parse.FullToken.FullImmediate; +            const uint pos = p->num_immediates++; +            uint j; +            for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { +               p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float; +            } +         } +         break; + +      case TGSI_TOKEN_TYPE_INSTRUCTION: +         if (p->first_instruction) { +            /* resolve location of immediates */ +            uint i, j; +            for (i = 0; i < p->num_immediates; i++) { +               /* find constant slot for this immediate */ +               for (j = 0; j < I915_MAX_CONSTANT; j++) { +                  if (ifs->constant_flags[j] == 0x0) { +                     memcpy(ifs->constants[j], +                            p->immediates[i], +                            4 * sizeof(float)); +                     /*printf("immediate %d maps to const %d\n", i, j);*/ +                     ifs->constant_flags[j] = 0xf;  /* all four comps used */ +                     p->immediates_map[i] = j; +                     ifs->num_constants = MAX2(ifs->num_constants, j + 1); +                     break; +                  } +               } +            } + +            p->first_instruction = FALSE; +         } + +         i915_translate_instruction(p, &parse.FullToken.FullInstruction); +         break; + +      default: +         assert( 0 ); +      } + +   } /* while */ + +   tgsi_parse_free (&parse); +} + + +static struct i915_fp_compile * +i915_init_compile(struct i915_context *i915, +                  struct i915_fragment_shader *ifs) +{ +   struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); + +   p->shader = ifs; + +   /* Put new constants at end of const buffer, growing downward. +    * The problem is we don't know how many user-defined constants might +    * be specified with pipe->set_constant_buffer(). +    * Should pre-scan the user's program to determine the highest-numbered +    * constant referenced. +    */ +   ifs->num_constants = 0; +   memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); + +   p->first_instruction = TRUE; + +   p->nr_tex_indirect = 1;      /* correct? */ +   p->nr_tex_insn = 0; +   p->nr_alu_insn = 0; +   p->nr_decl_insn = 0; + +   p->csr = p->program; +   p->decl = p->declarations; +   p->decl_s = 0; +   p->decl_t = 0; +   p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; +   p->utemp_flag = ~0x7; + +   p->wpos_tex = -1; + +   /* initialize the first program word */ +   *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; + +   return p; +} + + +/* Copy compile results to the fragment program struct and destroy the + * compilation context. + */ +static void +i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) +{ +   struct i915_fragment_shader *ifs = p->shader; +   unsigned long program_size = (unsigned long) (p->csr - p->program); +   unsigned long decl_size = (unsigned long) (p->decl - p->declarations); + +   if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) +      i915_program_error(p, "Exceeded max nr indirect texture lookups"); + +   if (p->nr_tex_insn > I915_MAX_TEX_INSN) +      i915_program_error(p, "Exceeded max TEX instructions"); + +   if (p->nr_alu_insn > I915_MAX_ALU_INSN) +      i915_program_error(p, "Exceeded max ALU instructions"); + +   if (p->nr_decl_insn > I915_MAX_DECL_INSN) +      i915_program_error(p, "Exceeded max DECL instructions"); + +   if (p->error) { +      p->NumNativeInstructions = 0; +      p->NumNativeAluInstructions = 0; +      p->NumNativeTexInstructions = 0; +      p->NumNativeTexIndirections = 0; + +      i915_use_passthrough_shader(ifs); +   } +   else { +      p->NumNativeInstructions +         = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn; +      p->NumNativeAluInstructions = p->nr_alu_insn; +      p->NumNativeTexInstructions = p->nr_tex_insn; +      p->NumNativeTexIndirections = p->nr_tex_indirect; + +      /* patch in the program length */ +      p->declarations[0] |= program_size + decl_size - 2; + +      /* Copy compilation results to fragment program struct:  +       */ +      assert(!ifs->program); +      ifs->program +         = (uint *) MALLOC((program_size + decl_size) * sizeof(uint)); +      if (ifs->program) { +         ifs->program_len = program_size + decl_size; + +         memcpy(ifs->program, +                p->declarations,  +                decl_size * sizeof(uint)); + +         memcpy(ifs->program + decl_size,  +                p->program,  +                program_size * sizeof(uint)); +      } +   } + +   /* Release the compilation struct:  +    */ +   FREE(p); +} + + +/** + * Find an unused texture coordinate slot to use for fragment WPOS. + * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found). + */ +static void +i915_find_wpos_space(struct i915_fp_compile *p) +{ +#if 0 +   const uint inputs +      = p->shader->inputs_read | (1 << TGSI_ATTRIB_POS); /*XXX hack*/ +   uint i; + +   p->wpos_tex = -1; + +   if (inputs & (1 << TGSI_ATTRIB_POS)) { +      for (i = 0; i < I915_TEX_UNITS; i++) { +	 if ((inputs & (1 << (TGSI_ATTRIB_TEX0 + i))) == 0) { +	    p->wpos_tex = i; +	    return; +	 } +      } + +      i915_program_error(p, "No free texcoord for wpos value"); +   } +#else +   if (p->shader->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { +      /* frag shader using the fragment position input */ +#if 0 +      assert(0); +#endif +   } +#endif +} + + + + +/** + * Rather than trying to intercept and jiggle depth writes during + * emit, just move the value into its correct position at the end of + * the program: + */ +static void +i915_fixup_depth_write(struct i915_fp_compile *p) +{ +   /* XXX assuming pos/depth is always in output[0] */ +   if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { +      const uint depth = UREG(REG_TYPE_OD, 0); + +      i915_emit_arith(p, +                      A0_MOV,                     /* opcode */ +                      depth,                      /* dest reg */ +                      A0_DEST_CHANNEL_W,          /* write mask */ +                      0,                          /* saturate? */ +                      swizzle(depth, X, Y, Z, Z), /* src0 */ +                      0, 0 /* src1, src2 */); +   } +} + + +void +i915_translate_fragment_program( struct i915_context *i915, +                                 struct i915_fragment_shader *fs) +{ +   struct i915_fp_compile *p = i915_init_compile(i915, fs); +   const struct tgsi_token *tokens = fs->state.tokens; + +   i915_find_wpos_space(p); + +#if 0 +   tgsi_dump(tokens, 0); +#endif + +   i915_translate_instructions(p, tokens); +   i915_fixup_depth_write(p); + +   i915_fini_compile(i915, p); +} diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c new file mode 100644 index 0000000000..8f1f58b2dd --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -0,0 +1,220 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include "draw/draw_pipe.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" + +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "i915_batch.h" + + + +/** + * Primitive emit to hardware.  No support for vertex buffers or any + * nice fast paths. + */ +struct setup_stage { +   struct draw_stage stage; /**< This must be first (base class) */ + +   struct i915_context *i915;    +}; + + + +/** + * Basically a cast wrapper. + */ +static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) +{ +   return (struct setup_stage *)stage; +} + + +/** + * Extract the needed fields from vertex_header and emit i915 dwords. + * Recall that the vertices are constructed by the 'draw' module and + * have a couple of slots at the beginning (1-dword header, 4-dword + * clip pos) that we ignore here. + */ +static INLINE void +emit_hw_vertex( struct i915_context *i915, +                const struct vertex_header *vertex) +{ +   const struct vertex_info *vinfo = &i915->current.vertex_info; +   uint i; +   uint count = 0;  /* for debug/sanity */ + +   assert(!i915->dirty); + +   for (i = 0; i < vinfo->num_attribs; i++) { +      const uint j = vinfo->attrib[i].src_index; +      const float *attrib = vertex->data[j]; +      switch (vinfo->attrib[i].emit) { +      case EMIT_1F: +         OUT_BATCH( fui(attrib[0]) ); +         count++; +         break; +      case EMIT_2F: +         OUT_BATCH( fui(attrib[0]) ); +         OUT_BATCH( fui(attrib[1]) ); +         count += 2; +         break; +      case EMIT_3F: +         OUT_BATCH( fui(attrib[0]) ); +         OUT_BATCH( fui(attrib[1]) ); +         OUT_BATCH( fui(attrib[2]) ); +         count += 3; +         break; +      case EMIT_4F: +         OUT_BATCH( fui(attrib[0]) ); +         OUT_BATCH( fui(attrib[1]) ); +         OUT_BATCH( fui(attrib[2]) ); +         OUT_BATCH( fui(attrib[3]) ); +         count += 4; +         break; +      case EMIT_4UB: +         OUT_BATCH( pack_ub4(float_to_ubyte( attrib[2] ), +                             float_to_ubyte( attrib[1] ), +                             float_to_ubyte( attrib[0] ), +                             float_to_ubyte( attrib[3] )) ); +         count += 1; +         break; +      default: +         assert(0); +      } +   } +   assert(count == vinfo->size); +} + + + +static INLINE void  +emit_prim( struct draw_stage *stage,  +	   struct prim_header *prim, +	   unsigned hwprim, +	   unsigned nr ) +{ +   struct i915_context *i915 = setup_stage(stage)->i915; +   unsigned vertex_size; +   unsigned i; + +   if (i915->dirty) +      i915_update_derived( i915 ); + +   if (i915->hardware_dirty) +      i915_emit_hardware_state( i915 ); + +   /* need to do this after validation! */ +   vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ +   assert(vertex_size >= 12); /* never smaller than 12 bytes */ + +   if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { +      FLUSH_BATCH(NULL); + +      /* Make sure state is re-emitted after a flush:  +       */ +      i915_update_derived( i915 ); +      i915_emit_hardware_state( i915 ); + +      if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { +	 assert(0); +	 return; +      } +   } + +   /* Emit each triangle as a single primitive.  I told you this was +    * simple. +    */ +   OUT_BATCH(_3DPRIMITIVE |  +	     hwprim | +	     ((4 + vertex_size * nr)/4 - 2)); + +   for (i = 0; i < nr; i++) +      emit_hw_vertex(i915, prim->v[i]); +} + + +static void  +setup_tri( struct draw_stage *stage, struct prim_header *prim ) +{ +   emit_prim( stage, prim, PRIM3D_TRILIST, 3 ); +} + + +static void +setup_line(struct draw_stage *stage, struct prim_header *prim) +{ +   emit_prim( stage, prim, PRIM3D_LINELIST, 2 ); +} + + +static void +setup_point(struct draw_stage *stage, struct prim_header *prim) +{ +   emit_prim( stage, prim, PRIM3D_POINTLIST, 1 ); +} + + +static void setup_flush( struct draw_stage *stage, unsigned flags ) +{ +} + +static void reset_stipple_counter( struct draw_stage *stage ) +{ +} + +static void render_destroy( struct draw_stage *stage ) +{ +   FREE( stage ); +} + + +/** + * Create a new primitive setup/render stage.  This gets plugged into + * the 'draw' module's pipeline. + */ +struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ) +{ +   struct setup_stage *setup = CALLOC_STRUCT(setup_stage); + +   setup->i915 = i915; +   setup->stage.draw = i915->draw; +   setup->stage.point = setup_point; +   setup->stage.line = setup_line; +   setup->stage.tri = setup_tri; +   setup->stage.flush = setup_flush; +   setup->stage.reset_stipple_counter = reset_stipple_counter; +   setup->stage.destroy = render_destroy; + +   return &setup->stage; +} diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c new file mode 100644 index 0000000000..f49f6d6ed1 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -0,0 +1,545 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * \file + * Build post-transformation, post-clipping vertex buffers and element + * lists by hooking into the end of the primitive pipeline and + * manipulating the vertex_id field in the vertex headers. + * + * XXX: work in progress  + *  + * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "pipe/p_debug.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_winsys.h" +#include "i915_batch.h" +#include "i915_state.h" + + +/** + * Primitive renderer for i915. + */ +struct i915_vbuf_render { +   struct vbuf_render base; + +   struct i915_context *i915;    + +   /** Vertex size in bytes */ +   unsigned vertex_size; + +   /** Software primitive */ +   unsigned prim; + +   /** Hardware primitive */ +   unsigned hwprim; + +   /** Genereate a vertex list */ +   unsigned fallback; + +   /* Stuff for the vbo */ +   struct pipe_buffer *vbo; +   size_t vbo_size; +   size_t vbo_offset; +   void *vbo_ptr; +   size_t vbo_alloc_size; +}; + + +/** + * Basically a cast wrapper. + */ +static INLINE struct i915_vbuf_render * +i915_vbuf_render( struct vbuf_render *render ) +{ +   assert(render); +   return (struct i915_vbuf_render *)render; +} + + +static const struct vertex_info * +i915_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ +   struct i915_vbuf_render *i915_render = i915_vbuf_render(render); +   struct i915_context *i915 = i915_render->i915; + +   if (i915->dirty) { +      /* make sure we have up to date vertex layout */ +      i915_update_derived( i915 ); +   } + +   return &i915->current.vertex_info; +} + + +static void * +i915_vbuf_render_allocate_vertices( struct vbuf_render *render, +                                    ushort vertex_size, +                                    ushort nr_vertices ) +{ +   struct i915_vbuf_render *i915_render = i915_vbuf_render(render); +   struct i915_context *i915 = i915_render->i915; +   struct pipe_screen *screen = i915->pipe.screen; +   size_t size = (size_t)vertex_size * (size_t)nr_vertices; + +   /* FIXME: handle failure */ +   assert(!i915->vbo); + +   if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) { +   } else { +      i915->vbo_flushed = 0; +      pipe_buffer_reference(screen, &i915_render->vbo, NULL); +   } + +   if (!i915_render->vbo) { +      i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); +      i915_render->vbo_offset = 0; +      i915_render->vbo = pipe_buffer_create(screen, +                                            64, +                                            I915_BUFFER_USAGE_LIT_VERTEX, +                                            i915_render->vbo_size); +      i915_render->vbo_ptr = pipe_buffer_map(screen, +                                             i915_render->vbo, +                                             PIPE_BUFFER_USAGE_CPU_WRITE); +      pipe_buffer_unmap(screen, i915_render->vbo); +   } + +   i915->vbo = i915_render->vbo; +   i915->vbo_offset = i915_render->vbo_offset; +   i915->dirty |= I915_NEW_VBO; + +   return (unsigned char *)i915_render->vbo_ptr + i915->vbo_offset; +} + + +static boolean +i915_vbuf_render_set_primitive( struct vbuf_render *render,  +                                unsigned prim ) +{ +   struct i915_vbuf_render *i915_render = i915_vbuf_render(render); +   i915_render->prim = prim; + +   switch(prim) { +   case PIPE_PRIM_POINTS: +      i915_render->hwprim = PRIM3D_POINTLIST; +      i915_render->fallback = 0; +      return TRUE; +   case PIPE_PRIM_LINES: +      i915_render->hwprim = PRIM3D_LINELIST; +      i915_render->fallback = 0; +      return TRUE; +   case PIPE_PRIM_LINE_LOOP: +      i915_render->hwprim = PRIM3D_LINELIST; +      i915_render->fallback = PIPE_PRIM_LINE_LOOP; +      return TRUE; +   case PIPE_PRIM_LINE_STRIP: +      i915_render->hwprim = PRIM3D_LINESTRIP; +      i915_render->fallback = 0; +      return TRUE; +   case PIPE_PRIM_TRIANGLES: +      i915_render->hwprim = PRIM3D_TRILIST; +      i915_render->fallback = 0; +      return TRUE; +   case PIPE_PRIM_TRIANGLE_STRIP: +      i915_render->hwprim = PRIM3D_TRISTRIP; +      i915_render->fallback = 0; +      return TRUE; +   case PIPE_PRIM_TRIANGLE_FAN: +      i915_render->hwprim = PRIM3D_TRIFAN; +      i915_render->fallback = 0; +      return TRUE; +   case PIPE_PRIM_QUADS: +      i915_render->hwprim = PRIM3D_TRILIST; +      i915_render->fallback = PIPE_PRIM_QUADS; +      return TRUE; +   case PIPE_PRIM_QUAD_STRIP: +      i915_render->hwprim = PRIM3D_TRILIST; +      i915_render->fallback = PIPE_PRIM_QUAD_STRIP; +      return TRUE; +   case PIPE_PRIM_POLYGON: +      i915_render->hwprim = PRIM3D_POLY; +      i915_render->fallback = 0; +      return TRUE; +   default: +      /* FIXME: Actually, can handle a lot more just fine... */ +      return FALSE; +   } +} + + + +/** + * Used for fallbacks in draw_arrays + */ +static void +draw_arrays_generate_indices( struct vbuf_render *render, +                              unsigned start, uint nr, +                              unsigned type ) +{ +   struct i915_vbuf_render *i915_render = i915_vbuf_render(render); +   struct i915_context *i915 = i915_render->i915; +   unsigned i; +   unsigned end = start + nr; +   switch(type) { +   case 0: +      for (i = start; i+1 < end; i += 2) +	 OUT_BATCH( (i+0) | (i+1) << 16 ); +      if (i < end) +	 OUT_BATCH( i ); +      break; +   case PIPE_PRIM_LINE_LOOP: +      if (nr >= 2) { +	 for (i = start + 1; i < end; i++) +	    OUT_BATCH( (i-0) | (i+0) << 16 ); +	 OUT_BATCH( (i-0) | (  start) << 16 ); +      } +      break; +   case PIPE_PRIM_QUADS: +      for (i = start; i + 3 < end; i += 4) { +	 OUT_BATCH( (i+0) | (i+1) << 16 ); +	 OUT_BATCH( (i+3) | (i+1) << 16 ); +	 OUT_BATCH( (i+2) | (i+3) << 16 ); +      } +      break; +   case PIPE_PRIM_QUAD_STRIP: +      for (i = start; i + 3 < end; i += 2) { +	 OUT_BATCH( (i+0) | (i+1) << 16 ); +	 OUT_BATCH( (i+3) | (i+2) << 16 ); +	 OUT_BATCH( (i+0) | (i+3) << 16 ); +      } +      break; +   default: +      assert(0); +   } +} + +static unsigned +draw_arrays_calc_nr_indices( uint nr, unsigned type ) +{ +   switch (type) { +   case 0: +      return nr; +   case PIPE_PRIM_LINE_LOOP: +      if (nr >= 2) +	 return nr * 2; +      else +	 return 0; +   case PIPE_PRIM_QUADS: +      return (nr / 4) * 6; +   case PIPE_PRIM_QUAD_STRIP: +      return ((nr - 2) / 2) * 6; +   default: +      assert(0); +      return 0; +   } +} + +static void +draw_arrays_fallback( struct vbuf_render *render, +                      unsigned start, +                      uint nr ) +{ +   struct i915_vbuf_render *i915_render = i915_vbuf_render(render); +   struct i915_context *i915 = i915_render->i915; +   unsigned nr_indices; + +   if (i915->dirty) +      i915_update_derived( i915 ); + +   if (i915->hardware_dirty) +      i915_emit_hardware_state( i915 ); + +   nr_indices = draw_arrays_calc_nr_indices( nr, i915_render->fallback ); +   if (!nr_indices) +      return; + +   if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { +      FLUSH_BATCH(NULL); + +      /* Make sure state is re-emitted after a flush: +       */ +      i915_update_derived( i915 ); +      i915_emit_hardware_state( i915 ); +      i915->vbo_flushed = 1; + +      if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { +	 assert(0); +	 goto out; +      } +   } +   OUT_BATCH( _3DPRIMITIVE | +	      PRIM_INDIRECT | +	      i915_render->hwprim | +	      PRIM_INDIRECT_ELTS | +	      nr_indices ); + +   draw_arrays_generate_indices( render, start, nr, i915_render->fallback ); + +out: +   return; +} + +static void +i915_vbuf_render_draw_arrays( struct vbuf_render *render, +                              unsigned start, +                              uint nr ) +{ +   struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + +   if (i915_render->fallback) { +      draw_arrays_fallback( render, start, nr ); +      return; +   } + +   /* JB: TODO submit direct cmds */ +   draw_arrays_fallback( render, start, nr ); +} + +/** + * Used for normal and fallback emitting of indices + * If type is zero normal operation assumed. + */ +static void +draw_generate_indices( struct vbuf_render *render, +                       const ushort *indices, +                       uint nr_indices, +                       unsigned type ) +{ +   struct i915_vbuf_render *i915_render = i915_vbuf_render(render); +   struct i915_context *i915 = i915_render->i915; +   unsigned i; + +   switch(type) { +   case 0: +      for (i = 0; i + 1 < nr_indices; i += 2) { +	 OUT_BATCH( indices[i] | indices[i+1] << 16 ); +      } +      if (i < nr_indices) { +	 OUT_BATCH( indices[i] ); +      } +      break; +   case PIPE_PRIM_LINE_LOOP: +      if (nr_indices >= 2) { +	 for (i = 1; i < nr_indices; i++) +	    OUT_BATCH( indices[i-1] | indices[i] << 16 ); +	 OUT_BATCH( indices[i-1] | indices[0] << 16 ); +      } +      break; +   case PIPE_PRIM_QUADS: +      for (i = 0; i + 3 < nr_indices; i += 4) { +	 OUT_BATCH( indices[i+0] | indices[i+1] << 16 ); +	 OUT_BATCH( indices[i+3] | indices[i+1] << 16 ); +	 OUT_BATCH( indices[i+2] | indices[i+3] << 16 ); +      } +      break; +   case PIPE_PRIM_QUAD_STRIP: +      for (i = 0; i + 3 < nr_indices; i += 2) { +	 OUT_BATCH( indices[i+0] | indices[i+1] << 16 ); +	 OUT_BATCH( indices[i+3] | indices[i+2] << 16 ); +	 OUT_BATCH( indices[i+0] | indices[i+3] << 16 ); +      } +      break; +   default: +      assert(0); +      break; +   } +} + +static unsigned +draw_calc_nr_indices( uint nr_indices, unsigned type ) +{ +   switch (type) { +   case 0: +      return nr_indices; +   case PIPE_PRIM_LINE_LOOP: +      if (nr_indices >= 2) +	 return nr_indices * 2; +      else +	 return 0; +   case PIPE_PRIM_QUADS: +      return (nr_indices / 4) * 6; +   case PIPE_PRIM_QUAD_STRIP: +      return ((nr_indices - 2) / 2) * 6; +   default: +      assert(0); +      return 0; +   } +} + +static void  +i915_vbuf_render_draw( struct vbuf_render *render, +                       const ushort *indices, +                       uint nr_indices) +{ +   struct i915_vbuf_render *i915_render = i915_vbuf_render(render); +   struct i915_context *i915 = i915_render->i915; +   unsigned save_nr_indices; + +   save_nr_indices = nr_indices; + +   nr_indices = draw_calc_nr_indices( nr_indices, i915_render->fallback ); +   if (!nr_indices) +      return; + +   if (i915->dirty) +      i915_update_derived( i915 ); + +   if (i915->hardware_dirty) +      i915_emit_hardware_state( i915 ); + +   if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { +      FLUSH_BATCH(NULL); + +      /* Make sure state is re-emitted after a flush:  +       */ +      i915_update_derived( i915 ); +      i915_emit_hardware_state( i915 ); +      i915->vbo_flushed = 1; + +      if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { +	 assert(0); +     goto out; +      } +   } + +   OUT_BATCH( _3DPRIMITIVE | +	      PRIM_INDIRECT | +	      i915_render->hwprim | +	      PRIM_INDIRECT_ELTS | +	      nr_indices ); +   draw_generate_indices( render, +			  indices, +			  save_nr_indices, +			  i915_render->fallback ); + +out: +   return; +} + + +static void +i915_vbuf_render_release_vertices( struct vbuf_render *render, +			           void *vertices,  +			           unsigned vertex_size, +			           unsigned vertices_used ) +{ +   struct i915_vbuf_render *i915_render = i915_vbuf_render(render); +   struct i915_context *i915 = i915_render->i915; +   size_t size = (size_t)vertex_size * (size_t)vertices_used; + +   assert(i915->vbo); + +   i915_render->vbo_offset += size; +   i915->vbo = NULL; +   i915->dirty |= I915_NEW_VBO; +} + + +static void +i915_vbuf_render_destroy( struct vbuf_render *render ) +{ +   struct i915_vbuf_render *i915_render = i915_vbuf_render(render); +   FREE(i915_render); +} + + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +i915_vbuf_render_create( struct i915_context *i915 ) +{ +   struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render); +   struct pipe_screen *screen = i915->pipe.screen; + +   i915_render->i915 = i915; +    +   i915_render->base.max_vertex_buffer_bytes = 128*1024; +    +   /* NOTE: it must be such that state and vertices indices fit in a single  +    * batch buffer. +    */ +   i915_render->base.max_indices = 16*1024; + +   i915_render->base.get_vertex_info = i915_vbuf_render_get_vertex_info; +   i915_render->base.allocate_vertices = i915_vbuf_render_allocate_vertices; +   i915_render->base.set_primitive = i915_vbuf_render_set_primitive; +   i915_render->base.draw = i915_vbuf_render_draw; +   i915_render->base.draw_arrays = i915_vbuf_render_draw_arrays; +   i915_render->base.release_vertices = i915_vbuf_render_release_vertices; +   i915_render->base.destroy = i915_vbuf_render_destroy; + +   i915_render->vbo_alloc_size = 128 * 4096; +   i915_render->vbo_size = i915_render->vbo_alloc_size; +   i915_render->vbo_offset = 0; +   i915_render->vbo = pipe_buffer_create(screen, +                                         64, +                                         I915_BUFFER_USAGE_LIT_VERTEX, +                                         i915_render->vbo_size); +   i915_render->vbo_ptr = pipe_buffer_map(screen, +                                          i915_render->vbo, +                                          PIPE_BUFFER_USAGE_CPU_WRITE); +   pipe_buffer_unmap(screen, i915_render->vbo); + +   return &i915_render->base; +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ) +{ +   struct vbuf_render *render; +   struct draw_stage *stage; +    +   render = i915_vbuf_render_create(i915); +   if(!render) +      return NULL; +    +   stage = draw_vbuf_stage( i915->draw, render ); +   if(!stage) { +      render->destroy(render); +      return NULL; +   } +   /** TODO JB: this shouldn't be here */ +   draw_set_render(i915->draw, render); + +   return stage; +} diff --git a/src/gallium/drivers/i915simple/i915_reg.h b/src/gallium/drivers/i915simple/i915_reg.h new file mode 100644 index 0000000000..04620fec68 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_reg.h @@ -0,0 +1,978 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef I915_REG_H +#define I915_REG_H + + +#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value) + +#define CMD_3D (0x3<<29) + +#define PRIM3D_INLINE		(CMD_3D | (0x1f<<24)) +#define PRIM3D_TRILIST		(0x0<<18) +#define PRIM3D_TRISTRIP 	(0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE	(0x2<<18) +#define PRIM3D_TRIFAN		(0x3<<18) +#define PRIM3D_POLY		(0x4<<18) +#define PRIM3D_LINELIST 	(0x5<<18) +#define PRIM3D_LINESTRIP	(0x6<<18) +#define PRIM3D_RECTLIST 	(0x7<<18) +#define PRIM3D_POINTLIST	(0x8<<18) +#define PRIM3D_DIB		(0x9<<18) +#define PRIM3D_CLEAR_RECT	(0xa<<18) +#define PRIM3D_ZONE_INIT	(0xd<<18) +#define PRIM3D_MASK		(0x1f<<18) + +/* p137 */ +#define _3DSTATE_AA_CMD			(CMD_3D | (0x06<<24)) +#define AA_LINE_ECAAR_WIDTH_ENABLE	(1<<16) +#define AA_LINE_ECAAR_WIDTH_0_5 	0 +#define AA_LINE_ECAAR_WIDTH_1_0		(1<<14) +#define AA_LINE_ECAAR_WIDTH_2_0 	(2<<14) +#define AA_LINE_ECAAR_WIDTH_4_0 	(3<<14) +#define AA_LINE_REGION_WIDTH_ENABLE	(1<<8) +#define AA_LINE_REGION_WIDTH_0_5	0 +#define AA_LINE_REGION_WIDTH_1_0	(1<<6) +#define AA_LINE_REGION_WIDTH_2_0	(2<<6) +#define AA_LINE_REGION_WIDTH_4_0	(3<<6) + +/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/ +#define _3DSTATE_BACKFACE_STENCIL_OPS    (CMD_3D | (0x8<<24)) +#define BFO_ENABLE_STENCIL_REF          (1<<23) +#define BFO_STENCIL_REF_SHIFT           15 +#define BFO_STENCIL_REF_MASK            (0xff<<15) +#define BFO_ENABLE_STENCIL_FUNCS        (1<<14) +#define BFO_STENCIL_TEST_SHIFT          11 +#define BFO_STENCIL_TEST_MASK           (0x7<<11) +#define BFO_STENCIL_FAIL_SHIFT          8 +#define BFO_STENCIL_FAIL_MASK           (0x7<<8) +#define BFO_STENCIL_PASS_Z_FAIL_SHIFT   5 +#define BFO_STENCIL_PASS_Z_FAIL_MASK    (0x7<<5) +#define BFO_STENCIL_PASS_Z_PASS_SHIFT   2 +#define BFO_STENCIL_PASS_Z_PASS_MASK    (0x7<<2) +#define BFO_ENABLE_STENCIL_TWO_SIDE     (1<<1) +#define BFO_STENCIL_TWO_SIDE            (1<<0) + + +/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */ +#define _3DSTATE_BACKFACE_STENCIL_MASKS    (CMD_3D | (0x9<<24)) +#define BFM_ENABLE_STENCIL_TEST_MASK      (1<<17) +#define BFM_ENABLE_STENCIL_WRITE_MASK     (1<<16) +#define BFM_STENCIL_TEST_MASK_SHIFT       8 +#define BFM_STENCIL_TEST_MASK_MASK        (0xff<<8) +#define BFM_STENCIL_WRITE_MASK_SHIFT      0 +#define BFM_STENCIL_WRITE_MASK_MASK       (0xff<<0) + + + +/* 3DSTATE_BIN_CONTROL p141 */ + +/* p143 */ +#define _3DSTATE_BUF_INFO_CMD	(CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK	(0x3<<24) +#define BUF_3D_ID_DEPTH 	(0x7<<24) +#define BUF_3D_USE_FENCE	(1<<23) +#define BUF_3D_TILED_SURFACE	(1<<22) +#define BUF_3D_TILE_WALK_X	0 +#define BUF_3D_TILE_WALK_Y	(1<<21) +#define BUF_3D_PITCH(x)         (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x)		((x) & ~0x3) + + +/* 3DSTATE_CHROMA_KEY */ + +/* 3DSTATE_CLEAR_PARAMETERS, p150 */ +#define _3DSTATE_CLEAR_PARAMETERS	(CMD_3D | (0x1d<<24) | (0x9c<<16) | 5) +/* Dword 1 */ +#define CLEARPARAM_CLEAR_RECT		(1 << 16) +#define CLEARPARAM_ZONE_INIT		(0 << 16) +#define CLEARPARAM_WRITE_COLOR		(1 << 2) +#define CLEARPARAM_WRITE_DEPTH		(1 << 1) +#define CLEARPARAM_WRITE_STENCIL	(1 << 0) + +/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */ +#define _3DSTATE_CONST_BLEND_COLOR_CMD	(CMD_3D | (0x1d<<24) | (0x88<<16)) + + + +/* 3DSTATE_COORD_SET_BINDINGS, p154 */ +#define _3DSTATE_COORD_SET_BINDINGS      (CMD_3D | (0x16<<24)) +#define CSB_TCB(iunit, eunit)           ((eunit)<<(iunit*3)) + +/* p156 */ +#define _3DSTATE_DFLT_DIFFUSE_CMD	(CMD_3D | (0x1d<<24) | (0x99<<16)) + +/* p157 */ +#define _3DSTATE_DFLT_SPEC_CMD		(CMD_3D | (0x1d<<24) | (0x9a<<16)) + +/* p158 */ +#define _3DSTATE_DFLT_Z_CMD		(CMD_3D | (0x1d<<24) | (0x98<<16)) + + +/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */ +#define _3DSTATE_DEPTH_OFFSET_SCALE       (CMD_3D | (0x1d<<24) | (0x97<<16)) +/* scale in dword 1 */ + + +/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */ +#define _3DSTATE_DEPTH_SUBRECT_DISABLE    (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2) + +/* p161 */ +#define _3DSTATE_DST_BUF_VARS_CMD	(CMD_3D | (0x1d<<24) | (0x85<<16)) +/* Dword 1 */ +#define TEX_DEFAULT_COLOR_OGL           (0<<30) +#define TEX_DEFAULT_COLOR_D3D           (1<<30) +#define ZR_EARLY_DEPTH                  (1<<29) +#define LOD_PRECLAMP_OGL                (1<<28) +#define LOD_PRECLAMP_D3D                (0<<28) +#define DITHER_FULL_ALWAYS              (0<<26) +#define DITHER_FULL_ON_FB_BLEND         (1<<26) +#define DITHER_CLAMPED_ALWAYS           (2<<26) +#define LINEAR_GAMMA_BLEND_32BPP        (1<<25) +#define DEBUG_DISABLE_ENH_DITHER        (1<<24) +#define DSTORG_HORT_BIAS(x)		((x)<<20) +#define DSTORG_VERT_BIAS(x)		((x)<<16) +#define COLOR_4_2_2_CHNL_WRT_ALL	0 +#define COLOR_4_2_2_CHNL_WRT_Y		(1<<12) +#define COLOR_4_2_2_CHNL_WRT_CR		(2<<12) +#define COLOR_4_2_2_CHNL_WRT_CB		(3<<12) +#define COLOR_4_2_2_CHNL_WRT_CRCB	(4<<12) +#define COLOR_BUF_8BIT			0 +#define COLOR_BUF_RGB555 		(1<<8) +#define COLOR_BUF_RGB565 		(2<<8) +#define COLOR_BUF_ARGB8888		(3<<8) +#define DEPTH_FRMT_16_FIXED		0 +#define DEPTH_FRMT_16_FLOAT		(1<<2) +#define DEPTH_FRMT_24_FIXED_8_OTHER	(2<<2) +#define VERT_LINE_STRIDE_1		(1<<1) +#define VERT_LINE_STRIDE_0		(0<<1) +#define VERT_LINE_STRIDE_OFS_1		1 +#define VERT_LINE_STRIDE_OFS_0		0 + +/* p166 */ +#define _3DSTATE_DRAW_RECT_CMD		(CMD_3D|(0x1d<<24)|(0x80<<16)|3) +/* Dword 1 */ +#define DRAW_RECT_DIS_DEPTH_OFS 	(1<<30) +#define DRAW_DITHER_OFS_X(x)		((x)<<26) +#define DRAW_DITHER_OFS_Y(x)		((x)<<24) +/* Dword 2 */ +#define DRAW_YMIN(x)			((x)<<16) +#define DRAW_XMIN(x)			(x) +/* Dword 3 */ +#define DRAW_YMAX(x)			((x)<<16) +#define DRAW_XMAX(x)			(x) +/* Dword 4 */ +#define DRAW_YORG(x)			((x)<<16) +#define DRAW_XORG(x)			(x) + + +/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */ + +/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */ + + +/* _3DSTATE_FOG_COLOR, p173 */ +#define _3DSTATE_FOG_COLOR_CMD		(CMD_3D|(0x15<<24)) +#define FOG_COLOR_RED(x)		((x)<<16) +#define FOG_COLOR_GREEN(x)		((x)<<8) +#define FOG_COLOR_BLUE(x)		(x) + +/* _3DSTATE_FOG_MODE, p174 */ +#define _3DSTATE_FOG_MODE_CMD		(CMD_3D|(0x1d<<24)|(0x89<<16)|2) +/* Dword 1 */ +#define FMC1_FOGFUNC_MODIFY_ENABLE	(1<<31) +#define FMC1_FOGFUNC_VERTEX		(0<<28) +#define FMC1_FOGFUNC_PIXEL_EXP		(1<<28) +#define FMC1_FOGFUNC_PIXEL_EXP2		(2<<28) +#define FMC1_FOGFUNC_PIXEL_LINEAR	(3<<28) +#define FMC1_FOGFUNC_MASK		(3<<28) +#define FMC1_FOGINDEX_MODIFY_ENABLE     (1<<27) +#define FMC1_FOGINDEX_Z		        (0<<25) +#define FMC1_FOGINDEX_W   		(1<<25) +#define FMC1_C1_C2_MODIFY_ENABLE	(1<<24) +#define FMC1_DENSITY_MODIFY_ENABLE	(1<<23) +#define FMC1_C1_ONE      	        (1<<13) +#define FMC1_C1_MASK		        (0xffff<<4) +/* Dword 2 */ +#define FMC2_C2_ONE		        (1<<16) +/* Dword 3 */ +#define FMC3_D_ONE      		(1<<16) + + + +/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */ +#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD	(CMD_3D|(0x0b<<24)) +#define IAB_MODIFY_ENABLE	        (1<<23) +#define IAB_ENABLE       	        (1<<22) +#define IAB_MODIFY_FUNC         	(1<<21) +#define IAB_FUNC_SHIFT          	16 +#define IAB_MODIFY_SRC_FACTOR   	(1<<11) +#define IAB_SRC_FACTOR_SHIFT		6 +#define IAB_SRC_FACTOR_MASK		(BLENDFACT_MASK<<6) +#define IAB_MODIFY_DST_FACTOR	        (1<<5) +#define IAB_DST_FACTOR_SHIFT		0 +#define IAB_DST_FACTOR_MASK		(BLENDFACT_MASK<<0) + + +#define BLENDFUNC_ADD			0x0 +#define BLENDFUNC_SUBTRACT		0x1 +#define BLENDFUNC_REVERSE_SUBTRACT	0x2 +#define BLENDFUNC_MIN			0x3 +#define BLENDFUNC_MAX			0x4 +#define BLENDFUNC_MASK			0x7 + +/* 3DSTATE_LOAD_INDIRECT, p180 */ + +#define _3DSTATE_LOAD_INDIRECT	        (CMD_3D|(0x1d<<24)|(0x7<<16)) +#define LI0_STATE_STATIC_INDIRECT       (0x01<<8) +#define LI0_STATE_DYNAMIC_INDIRECT      (0x02<<8) +#define LI0_STATE_SAMPLER               (0x04<<8) +#define LI0_STATE_MAP                   (0x08<<8) +#define LI0_STATE_PROGRAM               (0x10<<8) +#define LI0_STATE_CONSTANTS             (0x20<<8) + +#define SIS0_BUFFER_ADDRESS(x)          ((x)&~0x3) +#define SIS0_FORCE_LOAD                 (1<<1) +#define SIS0_BUFFER_VALID               (1<<0) +#define SIS1_BUFFER_LENGTH(x)           ((x)&0xff) + +#define DIS0_BUFFER_ADDRESS(x)          ((x)&~0x3) +#define DIS0_BUFFER_RESET               (1<<1) +#define DIS0_BUFFER_VALID               (1<<0) + +#define SSB0_BUFFER_ADDRESS(x)          ((x)&~0x3) +#define SSB0_FORCE_LOAD                 (1<<1) +#define SSB0_BUFFER_VALID               (1<<0) +#define SSB1_BUFFER_LENGTH(x)           ((x)&0xff) + +#define MSB0_BUFFER_ADDRESS(x)          ((x)&~0x3) +#define MSB0_FORCE_LOAD                 (1<<1) +#define MSB0_BUFFER_VALID               (1<<0) +#define MSB1_BUFFER_LENGTH(x)           ((x)&0xff) + +#define PSP0_BUFFER_ADDRESS(x)          ((x)&~0x3) +#define PSP0_FORCE_LOAD                 (1<<1) +#define PSP0_BUFFER_VALID               (1<<0) +#define PSP1_BUFFER_LENGTH(x)           ((x)&0xff) + +#define PSC0_BUFFER_ADDRESS(x)          ((x)&~0x3) +#define PSC0_FORCE_LOAD                 (1<<1) +#define PSC0_BUFFER_VALID               (1<<0) +#define PSC1_BUFFER_LENGTH(x)           ((x)&0xff) + + + + + +/* _3DSTATE_RASTERIZATION_RULES */ +#define _3DSTATE_RASTER_RULES_CMD	(CMD_3D|(0x07<<24)) +#define ENABLE_POINT_RASTER_RULE	(1<<15) +#define OGL_POINT_RASTER_RULE		(1<<13) +#define ENABLE_TEXKILL_3D_4D            (1<<10) +#define TEXKILL_3D                      (0<<9) +#define TEXKILL_4D                      (1<<9) +#define ENABLE_LINE_STRIP_PROVOKE_VRTX	(1<<8) +#define ENABLE_TRI_FAN_PROVOKE_VRTX	(1<<5) +#define LINE_STRIP_PROVOKE_VRTX(x)	((x)<<6) +#define TRI_FAN_PROVOKE_VRTX(x) 	((x)<<3) + +/* _3DSTATE_SCISSOR_ENABLE, p256 */ +#define _3DSTATE_SCISSOR_ENABLE_CMD	(CMD_3D|(0x1c<<24)|(0x10<<19)) +#define ENABLE_SCISSOR_RECT		((1<<1) | 1) +#define DISABLE_SCISSOR_RECT		(1<<1) + +/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */ +#define _3DSTATE_SCISSOR_RECT_0_CMD	(CMD_3D|(0x1d<<24)|(0x81<<16)|1) +/* Dword 1 */ +#define SCISSOR_RECT_0_YMIN(x)		((x)<<16) +#define SCISSOR_RECT_0_XMIN(x)		(x) +/* Dword 2 */ +#define SCISSOR_RECT_0_YMAX(x)		((x)<<16) +#define SCISSOR_RECT_0_XMAX(x)		(x) + +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1   ((0x3<<29)|(0x1d<<24)|(0x04<<16)) +#define I1_LOAD_S(n)                      (1<<(4+n)) + +#define S0_VB_OFFSET_MASK              0xffffffc +#define S0_AUTO_CACHE_INV_DISABLE      (1<<0) + +#define S1_VERTEX_WIDTH_SHIFT          24 +#define S1_VERTEX_WIDTH_MASK           (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT          16 +#define S1_VERTEX_PITCH_MASK           (0x3f<<16) + +#define TEXCOORDFMT_2D                 0x0 +#define TEXCOORDFMT_3D                 0x1 +#define TEXCOORDFMT_4D                 0x2 +#define TEXCOORDFMT_1D                 0x3 +#define TEXCOORDFMT_2D_16              0x4 +#define TEXCOORDFMT_4D_16              0x5 +#define TEXCOORDFMT_NOT_PRESENT        0xf +#define S2_TEXCOORD_FMT0_MASK            0xf +#define S2_TEXCOORD_FMT1_SHIFT           4 +#define S2_TEXCOORD_FMT(unit, type)    ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE               (~0) + +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT           23 +#define S4_POINT_WIDTH_MASK            (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT            19 +#define S4_LINE_WIDTH_ONE              (0x2<<19) +#define S4_LINE_WIDTH_MASK             (0xf<<19) +#define S4_FLATSHADE_ALPHA             (1<<18) +#define S4_FLATSHADE_FOG               (1<<17) +#define S4_FLATSHADE_SPECULAR          (1<<16) +#define S4_FLATSHADE_COLOR             (1<<15) +#define S4_CULLMODE_BOTH	       (0<<13) +#define S4_CULLMODE_NONE	       (1<<13) +#define S4_CULLMODE_CW		       (2<<13) +#define S4_CULLMODE_CCW		       (3<<13) +#define S4_CULLMODE_MASK	       (3<<13) +#define S4_VFMT_POINT_WIDTH            (1<<12) +#define S4_VFMT_SPEC_FOG               (1<<11) +#define S4_VFMT_COLOR                  (1<<10) +#define S4_VFMT_DEPTH_OFFSET           (1<<9) +#define S4_VFMT_XYZ     	       (1<<6) +#define S4_VFMT_XYZW     	       (2<<6) +#define S4_VFMT_XY     		       (3<<6) +#define S4_VFMT_XYW     	       (4<<6) +#define S4_VFMT_XYZW_MASK              (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE       (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR      (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE   (1<<3) +#define S4_VFMT_FOG_PARAM              (1<<2) +#define S4_SPRITE_POINT_ENABLE         (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE       (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH   | 	\ +		      S4_VFMT_SPEC_FOG      |	\ +		      S4_VFMT_COLOR         |	\ +		      S4_VFMT_DEPTH_OFFSET  |	\ +		      S4_VFMT_XYZW_MASK     |	\ +		      S4_VFMT_FOG_PARAM) + + +#define S5_WRITEDISABLE_ALPHA          (1<<31) +#define S5_WRITEDISABLE_RED            (1<<30) +#define S5_WRITEDISABLE_GREEN          (1<<29) +#define S5_WRITEDISABLE_BLUE           (1<<28) +#define S5_WRITEDISABLE_MASK           (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE    (1<<27) +#define S5_LAST_PIXEL_ENABLE           (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE  (1<<25) +#define S5_FOG_ENABLE                  (1<<24) +#define S5_STENCIL_REF_SHIFT           16 +#define S5_STENCIL_REF_MASK            (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT     13 +#define S5_STENCIL_TEST_FUNC_MASK      (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT          10 +#define S5_STENCIL_FAIL_MASK           (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT   7 +#define S5_STENCIL_PASS_Z_FAIL_MASK    (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT   4 +#define S5_STENCIL_PASS_Z_PASS_MASK    (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE        (1<<3) +#define S5_STENCIL_TEST_ENABLE         (1<<2) +#define S5_COLOR_DITHER_ENABLE         (1<<1) +#define S5_LOGICOP_ENABLE              (1<<0) + + +#define S6_ALPHA_TEST_ENABLE           (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT       28 +#define S6_ALPHA_TEST_FUNC_MASK        (0x7<<28) +#define S6_ALPHA_REF_SHIFT             20 +#define S6_ALPHA_REF_MASK              (0xff<<20) +#define S6_DEPTH_TEST_ENABLE           (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT       16 +#define S6_DEPTH_TEST_FUNC_MASK        (0x7<<16) +#define S6_CBUF_BLEND_ENABLE           (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT       12 +#define S6_CBUF_BLEND_FUNC_MASK        (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT   8 +#define S6_CBUF_SRC_BLEND_FACT_MASK    (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT   4 +#define S6_CBUF_DST_BLEND_FACT_MASK    (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE          (1<<3) +#define S6_COLOR_WRITE_ENABLE          (1<<2) +#define S6_TRISTRIP_PV_SHIFT           0 +#define S6_TRISTRIP_PV_MASK            (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK     ~0 + + + +#define DST_BLND_FACT(f) ((f)<<S6_CBUF_DST_BLEND_FACT_SHIFT) +#define SRC_BLND_FACT(f) ((f)<<S6_CBUF_SRC_BLEND_FACT_SHIFT) +#define DST_ABLND_FACT(f) ((f)<<IAB_DST_FACTOR_SHIFT) +#define SRC_ABLND_FACT(f) ((f)<<IAB_SRC_FACTOR_SHIFT) + + + + +/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */ + +/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */ +#define _3DSTATE_MAP_PALETTE_LOAD_32    (CMD_3D|(0x1d<<24)|(0x8f<<16)) +/* subsequent dwords up to length (max 16) are ARGB8888 color values */ + +/* _3DSTATE_MODES_4, p218 */ +#define _3DSTATE_MODES_4_CMD		(CMD_3D|(0x0d<<24)) +#define ENABLE_LOGIC_OP_FUNC		(1<<23) +#define LOGIC_OP_FUNC(x)		((x)<<18) +#define LOGICOP_MASK			(0xf<<18) +#define MODE4_ENABLE_STENCIL_TEST_MASK	((1<<17)|(0xff00)) +#define ENABLE_STENCIL_TEST_MASK	(1<<17) +#define STENCIL_TEST_MASK(x)		(((x)&0xff)<<8) +#define MODE4_ENABLE_STENCIL_WRITE_MASK	((1<<16)|(0x00ff)) +#define ENABLE_STENCIL_WRITE_MASK	(1<<16) +#define STENCIL_WRITE_MASK(x)		((x)&0xff) + +/* _3DSTATE_MODES_5, p220 */ +#define _3DSTATE_MODES_5_CMD		(CMD_3D|(0x0c<<24)) +#define PIPELINE_FLUSH_RENDER_CACHE	(1<<18) +#define PIPELINE_FLUSH_TEXTURE_CACHE	(1<<16) + + +/* p221 */ +#define _3DSTATE_PIXEL_SHADER_CONSTANTS  (CMD_3D|(0x1d<<24)|(0x6<<16)) +#define PS1_REG(n)                      (1<<(n)) +#define PS2_CONST_X(n)                  (n) +#define PS3_CONST_Y(n)                  (n) +#define PS4_CONST_Z(n)                  (n) +#define PS5_CONST_W(n)                  (n) + +/* p222 */ + + +#define I915_MAX_TEX_INDIRECT 4 +#define I915_MAX_TEX_INSN     32 +#define I915_MAX_ALU_INSN     64 +#define I915_MAX_DECL_INSN    27 +#define I915_MAX_TEMPORARY    16 + + +/* Each instruction is 3 dwords long, though most don't require all + * this space.  Maximum of 123 instructions.  Smaller maxes per insn + * type. + */ +#define _3DSTATE_PIXEL_SHADER_PROGRAM    (CMD_3D|(0x1d<<24)|(0x5<<16)) + +#define REG_TYPE_R                 0    /* temporary regs, no need to +                                         * dcl, must be written before +                                         * read -- Preserved between +                                         * phases.  +                                         */ +#define REG_TYPE_T                 1    /* Interpolated values, must be +                                         * dcl'ed before use. +                                         * +                                         * 0..7: texture coord, +                                         * 8: diffuse spec, +                                         * 9: specular color, +                                         * 10: fog parameter in w. +                                         */ +#define REG_TYPE_CONST             2    /* Restriction: only one const +                                         * can be referenced per +                                         * instruction, though it may be +                                         * selected for multiple inputs. +                                         * Constants not initialized +                                         * default to zero. +                                         */ +#define REG_TYPE_S                 3    /* sampler */ +#define REG_TYPE_OC                4    /* output color (rgba) */ +#define REG_TYPE_OD                5    /* output depth (w), xyz are +                                         * temporaries.  If not written, +                                         * interpolated depth is used? +                                         */ +#define REG_TYPE_U                 6    /* unpreserved temporaries */ +#define REG_TYPE_MASK              0x7 +#define REG_NR_MASK                0xf + + +/* REG_TYPE_T: + */ +#define T_TEX0     0 +#define T_TEX1     1 +#define T_TEX2     2 +#define T_TEX3     3 +#define T_TEX4     4 +#define T_TEX5     5 +#define T_TEX6     6 +#define T_TEX7     7 +#define T_DIFFUSE  8 +#define T_SPECULAR 9 +#define T_FOG_W    10           /* interpolated fog is in W coord */ + +/* Arithmetic instructions */ + +/* .replicate_swizzle == selection and replication of a particular + * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww  + */ +#define A0_NOP    (0x0<<24)     /* no operation */ +#define A0_ADD    (0x1<<24)     /* dst = src0 + src1 */ +#define A0_MOV    (0x2<<24)     /* dst = src0 */ +#define A0_MUL    (0x3<<24)     /* dst = src0 * src1 */ +#define A0_MAD    (0x4<<24)     /* dst = src0 * src1 + src2 */ +#define A0_DP2ADD (0x5<<24)     /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ +#define A0_DP3    (0x6<<24)     /* dst.xyzw = src0.xyz dot src1.xyz */ +#define A0_DP4    (0x7<<24)     /* dst.xyzw = src0.xyzw dot src1.xyzw */ +#define A0_FRC    (0x8<<24)     /* dst = src0 - floor(src0) */ +#define A0_RCP    (0x9<<24)     /* dst.xyzw = 1/(src0.replicate_swizzle) */ +#define A0_RSQ    (0xa<<24)     /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ +#define A0_EXP    (0xb<<24)     /* dst.xyzw = exp2(src0.replicate_swizzle) */ +#define A0_LOG    (0xc<<24)     /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ +#define A0_CMP    (0xd<<24)     /* dst = (src0 >= 0.0) ? src1 : src2 */ +#define A0_MIN    (0xe<<24)     /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX    (0xf<<24)     /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR    (0x10<<24)    /* dst = floor(src0) */ +#define A0_MOD    (0x11<<24)    /* dst = src0 fmod 1.0 */ +#define A0_TRC    (0x12<<24)    /* dst = int(src0) */ +#define A0_SGE    (0x13<<24)    /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT    (0x14<<24)    /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE                 (1<<22) +#define A0_DEST_TYPE_SHIFT                19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT                 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X                (1<<10) +#define A0_DEST_CHANNEL_Y                (2<<10) +#define A0_DEST_CHANNEL_Z                (4<<10) +#define A0_DEST_CHANNEL_W                (8<<10) +#define A0_DEST_CHANNEL_ALL              (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT            10 +#define A0_SRC0_TYPE_SHIFT               7 +#define A0_SRC0_NR_SHIFT                 2 + +#define A0_DEST_CHANNEL_XY              (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ             (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + + +#define SRC_X        0 +#define SRC_Y        1 +#define SRC_Z        2 +#define SRC_W        3 +#define SRC_ZERO     4 +#define SRC_ONE      5 + +#define A1_SRC0_CHANNEL_X_NEGATE         (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT          28 +#define A1_SRC0_CHANNEL_Y_NEGATE         (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT          24 +#define A1_SRC0_CHANNEL_Z_NEGATE         (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT          20 +#define A1_SRC0_CHANNEL_W_NEGATE         (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT          16 +#define A1_SRC1_TYPE_SHIFT               13 +#define A1_SRC1_NR_SHIFT                 8 +#define A1_SRC1_CHANNEL_X_NEGATE         (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT          4 +#define A1_SRC1_CHANNEL_Y_NEGATE         (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT          0 + +#define A2_SRC1_CHANNEL_Z_NEGATE         (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT          28 +#define A2_SRC1_CHANNEL_W_NEGATE         (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT          24 +#define A2_SRC2_TYPE_SHIFT               21 +#define A2_SRC2_NR_SHIFT                 16 +#define A2_SRC2_CHANNEL_X_NEGATE         (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT          12 +#define A2_SRC2_CHANNEL_Y_NEGATE         (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT          8 +#define A2_SRC2_CHANNEL_Z_NEGATE         (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT          4 +#define A2_SRC2_CHANNEL_W_NEGATE         (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT          0 + + + +/* Texture instructions */ +#define T0_TEXLD     (0x15<<24) /* Sample texture using predeclared +                                 * sampler and address, and output +                                 * filtered texel data to destination +                                 * register */ +#define T0_TEXLDP    (0x16<<24) /* Same as texld but performs a +                                 * perspective divide of the texture +                                 * coordinate .xyz values by .w before +                                 * sampling. */ +#define T0_TEXLDB    (0x17<<24) /* Same as texld but biases the +                                 * computed LOD by w.  Only S4.6 two's +                                 * comp is used.  This implies that a +                                 * float to fixed conversion is +                                 * done. */ +#define T0_TEXKILL   (0x18<<24) /* Does not perform a sampling +                                 * operation.  Simply kills the pixel +                                 * if any channel of the address +                                 * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT                19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback)  + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction).  + */ +#define T0_DEST_NR_SHIFT                 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT              0      /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK               (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT        24     /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT          17 +#define T2_MBZ                           0 + +/* Declaration instructions */ +#define D0_DCL       (0x19<<24) /* Declare a t (interpolated attrib) +                                 * register or an s (sampler) +                                 * register. */ +#define D0_SAMPLE_TYPE_SHIFT              22 +#define D0_SAMPLE_TYPE_2D                 (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE               (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME             (0x2<<22) +#define D0_SAMPLE_TYPE_MASK               (0x3<<22) + +#define D0_TYPE_SHIFT                19 +/* Allow: T, S */ +#define D0_NR_SHIFT                  14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X                (1<<10) +#define D0_CHANNEL_Y                (2<<10) +#define D0_CHANNEL_Z                (4<<10) +#define D0_CHANNEL_W                (8<<10) +#define D0_CHANNEL_ALL              (0xf<<10) +#define D0_CHANNEL_NONE             (0<<10) + +#define D0_CHANNEL_XY               (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ              (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations.  + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw)  + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ                          0 +#define D2_MBZ                          0 + + + +/* p207 */ +#define _3DSTATE_MAP_STATE               (CMD_3D|(0x1d<<24)|(0x0<<16)) + +#define MS1_MAPMASK_SHIFT               0 +#define MS1_MAPMASK_MASK                (0x8fff<<0) + +#define MS2_UNTRUSTED_SURFACE           (1<<31) +#define MS2_ADDRESS_MASK                0xfffffffc +#define MS2_VERTICAL_LINE_STRIDE        (1<<1) +#define MS2_VERTICAL_OFFSET             (1<<1) + +#define MS3_HEIGHT_SHIFT              21 +#define MS3_WIDTH_SHIFT               10 +#define MS3_PALETTE_SELECT            (1<<9) +#define MS3_MAPSURF_FORMAT_SHIFT      7 +#define MS3_MAPSURF_FORMAT_MASK       (0x7<<7) +#define    MAPSURF_8BIT		 	   (1<<7) +#define    MAPSURF_16BIT		   (2<<7) +#define    MAPSURF_32BIT		   (3<<7) +#define    MAPSURF_422			   (5<<7) +#define    MAPSURF_COMPRESSED		   (6<<7) +#define    MAPSURF_4BIT_INDEXED		   (7<<7) +#define MS3_MT_FORMAT_MASK         (0x7 << 3) +#define MS3_MT_FORMAT_SHIFT        3 +#define    MT_4BIT_IDX_ARGB8888	           (7<<3)       /* SURFACE_4BIT_INDEXED */ +#define    MT_8BIT_I8		           (0<<3)       /* SURFACE_8BIT */ +#define    MT_8BIT_L8		           (1<<3) +#define    MT_8BIT_A8		           (4<<3) +#define    MT_8BIT_MONO8	           (5<<3) +#define    MT_16BIT_RGB565 		   (0<<3)       /* SURFACE_16BIT */ +#define    MT_16BIT_ARGB1555		   (1<<3) +#define    MT_16BIT_ARGB4444		   (2<<3) +#define    MT_16BIT_AY88		   (3<<3) +#define    MT_16BIT_88DVDU	           (5<<3) +#define    MT_16BIT_BUMP_655LDVDU	   (6<<3) +#define    MT_16BIT_I16	                   (7<<3) +#define    MT_16BIT_L16	                   (8<<3) +#define    MT_16BIT_A16	                   (9<<3) +#define    MT_32BIT_ARGB8888		   (0<<3)       /* SURFACE_32BIT */ +#define    MT_32BIT_ABGR8888		   (1<<3) +#define    MT_32BIT_XRGB8888		   (2<<3) +#define    MT_32BIT_XBGR8888		   (3<<3) +#define    MT_32BIT_QWVU8888		   (4<<3) +#define    MT_32BIT_AXVU8888		   (5<<3) +#define    MT_32BIT_LXVU8888	           (6<<3) +#define    MT_32BIT_XLVU8888	           (7<<3) +#define    MT_32BIT_ARGB2101010	           (8<<3) +#define    MT_32BIT_ABGR2101010	           (9<<3) +#define    MT_32BIT_AWVU2101010	           (0xA<<3) +#define    MT_32BIT_GR1616	           (0xB<<3) +#define    MT_32BIT_VU1616	           (0xC<<3) +#define    MT_32BIT_xI824	           (0xD<<3) +#define    MT_32BIT_xA824	           (0xE<<3) +#define    MT_32BIT_xL824	           (0xF<<3) +#define    MT_422_YCRCB_SWAPY	           (0<<3)       /* SURFACE_422 */ +#define    MT_422_YCRCB_NORMAL	           (1<<3) +#define    MT_422_YCRCB_SWAPUV	           (2<<3) +#define    MT_422_YCRCB_SWAPUVY	           (3<<3) +#define    MT_COMPRESS_DXT1		   (0<<3)       /* SURFACE_COMPRESSED */ +#define    MT_COMPRESS_DXT2_3	           (1<<3) +#define    MT_COMPRESS_DXT4_5	           (2<<3) +#define    MT_COMPRESS_FXT1		   (3<<3) +#define    MT_COMPRESS_DXT1_RGB		   (4<<3) +#define MS3_USE_FENCE_REGS              (1<<2) +#define MS3_TILED_SURFACE             (1<<1) +#define MS3_TILE_WALK                 (1<<0) + +#define MS4_PITCH_SHIFT                 21 +#define MS4_CUBE_FACE_ENA_NEGX          (1<<20) +#define MS4_CUBE_FACE_ENA_POSX          (1<<19) +#define MS4_CUBE_FACE_ENA_NEGY          (1<<18) +#define MS4_CUBE_FACE_ENA_POSY          (1<<17) +#define MS4_CUBE_FACE_ENA_NEGZ          (1<<16) +#define MS4_CUBE_FACE_ENA_POSZ          (1<<15) +#define MS4_CUBE_FACE_ENA_MASK          (0x3f<<15) +#define MS4_MAX_LOD_SHIFT		9 +#define MS4_MAX_LOD_MASK		(0x3f<<9) +#define MS4_MIP_LAYOUT_LEGACY           (0<<8) +#define MS4_MIP_LAYOUT_BELOW_LPT        (0<<8) +#define MS4_MIP_LAYOUT_RIGHT_LPT        (1<<8) +#define MS4_VOLUME_DEPTH_SHIFT          0 +#define MS4_VOLUME_DEPTH_MASK           (0xff<<0) + +/* p244 */ +#define _3DSTATE_SAMPLER_STATE         (CMD_3D|(0x1d<<24)|(0x1<<16)) + +#define SS1_MAPMASK_SHIFT               0 +#define SS1_MAPMASK_MASK                (0x8fff<<0) + +#define SS2_REVERSE_GAMMA_ENABLE        (1<<31) +#define SS2_PACKED_TO_PLANAR_ENABLE     (1<<30) +#define SS2_COLORSPACE_CONVERSION       (1<<29) +#define SS2_CHROMAKEY_SHIFT             27 +#define SS2_BASE_MIP_LEVEL_SHIFT        22 +#define SS2_BASE_MIP_LEVEL_MASK         (0x1f<<22) +#define SS2_MIP_FILTER_SHIFT            20 +#define SS2_MIP_FILTER_MASK             (0x3<<20) +#define   MIPFILTER_NONE       	0 +#define   MIPFILTER_NEAREST	1 +#define   MIPFILTER_LINEAR	3 +#define SS2_MAG_FILTER_SHIFT          17 +#define SS2_MAG_FILTER_MASK           (0x7<<17) +#define   FILTER_NEAREST	0 +#define   FILTER_LINEAR		1 +#define   FILTER_ANISOTROPIC	2 +#define   FILTER_4X4_1    	3 +#define   FILTER_4X4_2    	4 +#define   FILTER_4X4_FLAT 	5 +#define   FILTER_6X5_MONO   	6       /* XXX - check */ +#define SS2_MIN_FILTER_SHIFT          14 +#define SS2_MIN_FILTER_MASK           (0x7<<14) +#define SS2_LOD_BIAS_SHIFT            5 +#define SS2_LOD_BIAS_ONE              (0x10<<5) +#define SS2_LOD_BIAS_MASK             (0x1ff<<5) +/* Shadow requires: + *  MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format + *  FILTER_4X4_x  MIN and MAG filters + */ +#define SS2_SHADOW_ENABLE             (1<<4) +#define SS2_MAX_ANISO_MASK            (1<<3) +#define SS2_MAX_ANISO_2               (0<<3) +#define SS2_MAX_ANISO_4               (1<<3) +#define SS2_SHADOW_FUNC_SHIFT         0 +#define SS2_SHADOW_FUNC_MASK          (0x7<<0) +/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */ + +#define SS3_MIN_LOD_SHIFT            24 +#define SS3_MIN_LOD_ONE              (0x10<<24) +#define SS3_MIN_LOD_MASK             (0xff<<24) +#define SS3_KILL_PIXEL_ENABLE        (1<<17) +#define SS3_TCX_ADDR_MODE_SHIFT      12 +#define SS3_TCX_ADDR_MODE_MASK       (0x7<<12) +#define   TEXCOORDMODE_WRAP		0 +#define   TEXCOORDMODE_MIRROR		1 +#define   TEXCOORDMODE_CLAMP_EDGE	2 +#define   TEXCOORDMODE_CUBE       	3 +#define   TEXCOORDMODE_CLAMP_BORDER	4 +#define   TEXCOORDMODE_MIRROR_ONCE      5 +#define SS3_TCY_ADDR_MODE_SHIFT      9 +#define SS3_TCY_ADDR_MODE_MASK       (0x7<<9) +#define SS3_TCZ_ADDR_MODE_SHIFT      6 +#define SS3_TCZ_ADDR_MODE_MASK       (0x7<<6) +#define SS3_NORMALIZED_COORDS        (1<<5) +#define SS3_TEXTUREMAP_INDEX_SHIFT   1 +#define SS3_TEXTUREMAP_INDEX_MASK    (0xf<<1) +#define SS3_DEINTERLACER_ENABLE      (1<<0) + +#define SS4_BORDER_COLOR_MASK        (~0) + +/* 3DSTATE_SPAN_STIPPLE, p258 + */ +#define _3DSTATE_STIPPLE           ((0x3<<29)|(0x1d<<24)|(0x83<<16)) +#define ST1_ENABLE               (1<<16) +#define ST1_MASK                 (0xffff) + +#define _3DSTATE_DEFAULT_Z          ((0x3<<29)|(0x1d<<24)|(0x98<<16)) +#define _3DSTATE_DEFAULT_DIFFUSE    ((0x3<<29)|(0x1d<<24)|(0x99<<16)) +#define _3DSTATE_DEFAULT_SPECULAR   ((0x3<<29)|(0x1d<<24)|(0x9a<<16)) + + +#define MI_FLUSH                   ((0<<29)|(4<<23)) +#define FLUSH_MAP_CACHE            (1<<0) +#define INHIBIT_FLUSH_RENDER_CACHE (1<<2) + + +#define CMD_3D (0x3<<29) + + +#define _3DPRIMITIVE         ((0x3<<29)|(0x1f<<24)) +#define PRIM_INDIRECT            (1<<23) +#define PRIM_INLINE              (0<<23) +#define PRIM_INDIRECT_SEQUENTIAL (0<<17) +#define PRIM_INDIRECT_ELTS       (1<<17) + +#define PRIM3D_TRILIST		(0x0<<18) +#define PRIM3D_TRISTRIP 	(0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE	(0x2<<18) +#define PRIM3D_TRIFAN		(0x3<<18) +#define PRIM3D_POLY		(0x4<<18) +#define PRIM3D_LINELIST 	(0x5<<18) +#define PRIM3D_LINESTRIP	(0x6<<18) +#define PRIM3D_RECTLIST 	(0x7<<18) +#define PRIM3D_POINTLIST	(0x8<<18) +#define PRIM3D_DIB		(0x9<<18) +#define PRIM3D_MASK		(0x1f<<18) + +#define I915PACKCOLOR4444(r,g,b,a) \ +  ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) + +#define I915PACKCOLOR1555(r,g,b,a) \ +  ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ +    ((a) ? 0x8000 : 0)) + +#define I915PACKCOLOR565(r,g,b) \ +  ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) + +#define I915PACKCOLOR8888(r,g,b,a) \ +  ((a<<24) | (r<<16) | (g<<8) | b) + + + + +#define BR00_BITBLT_CLIENT   0x40000000 +#define BR00_OP_COLOR_BLT    0x10000000 +#define BR00_OP_SRC_COPY_BLT 0x10C00000 +#define BR13_SOLID_PATTERN   0x80000000 + +#define XY_COLOR_BLT_CMD		((2<<29)|(0x50<<22)|0x4) +#define XY_COLOR_BLT_WRITE_ALPHA	(1<<21) +#define XY_COLOR_BLT_WRITE_RGB		(1<<20) + +#define XY_SRC_COPY_BLT_CMD             ((2<<29)|(0x53<<22)|6) +#define XY_SRC_COPY_BLT_WRITE_ALPHA     (1<<21) +#define XY_SRC_COPY_BLT_WRITE_RGB       (1<<20) + +#define MI_WAIT_FOR_EVENT               ((0x3<<23)) +#define MI_WAIT_FOR_PLANE_B_FLIP        (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP        (1<<2) + +#define MI_BATCH_BUFFER                 (0x30<<23) +#define MI_BATCH_BUFFER_START           (0x31<<23) +#define MI_BATCH_BUFFER_END             (0xa<<23) + + + +#define COMPAREFUNC_ALWAYS		0 +#define COMPAREFUNC_NEVER		0x1 +#define COMPAREFUNC_LESS		0x2 +#define COMPAREFUNC_EQUAL		0x3 +#define COMPAREFUNC_LEQUAL		0x4 +#define COMPAREFUNC_GREATER		0x5 +#define COMPAREFUNC_NOTEQUAL		0x6 +#define COMPAREFUNC_GEQUAL		0x7 + +#define STENCILOP_KEEP			0 +#define STENCILOP_ZERO			0x1 +#define STENCILOP_REPLACE		0x2 +#define STENCILOP_INCRSAT		0x3 +#define STENCILOP_DECRSAT		0x4 +#define STENCILOP_INCR			0x5 +#define STENCILOP_DECR			0x6 +#define STENCILOP_INVERT		0x7 + +#define LOGICOP_CLEAR			0 +#define LOGICOP_NOR			0x1 +#define LOGICOP_AND_INV 		0x2 +#define LOGICOP_COPY_INV		0x3 +#define LOGICOP_AND_RVRSE		0x4 +#define LOGICOP_INV			0x5 +#define LOGICOP_XOR			0x6 +#define LOGICOP_NAND			0x7 +#define LOGICOP_AND			0x8 +#define LOGICOP_EQUIV			0x9 +#define LOGICOP_NOOP			0xa +#define LOGICOP_OR_INV			0xb +#define LOGICOP_COPY			0xc +#define LOGICOP_OR_RVRSE		0xd +#define LOGICOP_OR			0xe +#define LOGICOP_SET			0xf + +#define BLENDFACT_ZERO			0x01 +#define BLENDFACT_ONE			0x02 +#define BLENDFACT_SRC_COLR		0x03 +#define BLENDFACT_INV_SRC_COLR 		0x04 +#define BLENDFACT_SRC_ALPHA		0x05 +#define BLENDFACT_INV_SRC_ALPHA 	0x06 +#define BLENDFACT_DST_ALPHA		0x07 +#define BLENDFACT_INV_DST_ALPHA 	0x08 +#define BLENDFACT_DST_COLR		0x09 +#define BLENDFACT_INV_DST_COLR		0x0a +#define BLENDFACT_SRC_ALPHA_SATURATE	0x0b +#define BLENDFACT_CONST_COLOR		0x0c +#define BLENDFACT_INV_CONST_COLOR	0x0d +#define BLENDFACT_CONST_ALPHA		0x0e +#define BLENDFACT_INV_CONST_ALPHA	0x0f +#define BLENDFACT_MASK          	0x0f + +#define PCI_CHIP_I915_G			0x2582 +#define PCI_CHIP_I915_GM		0x2592 +#define PCI_CHIP_I945_G			0x2772 +#define PCI_CHIP_I945_GM		0x27A2 +#define PCI_CHIP_I945_GME		0x27AE +#define PCI_CHIP_G33_G			0x29C2 +#define PCI_CHIP_Q35_G			0x29B2 +#define PCI_CHIP_Q33_G			0x29D2 + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c new file mode 100644 index 0000000000..39e48105b3 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -0,0 +1,288 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include "util/u_memory.h" +#include "util/u_simple_screen.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_string.h" + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_screen.h" +#include "i915_texture.h" + + +static const char * +i915_get_vendor( struct pipe_screen *pscreen ) +{ +   return "Tungsten Graphics, Inc."; +} + + +static const char * +i915_get_name( struct pipe_screen *pscreen ) +{ +   static char buffer[128]; +   const char *chipset; + +   switch (i915_screen(pscreen)->pci_id) { +   case PCI_CHIP_I915_G: +      chipset = "915G"; +      break; +   case PCI_CHIP_I915_GM: +      chipset = "915GM"; +      break; +   case PCI_CHIP_I945_G: +      chipset = "945G"; +      break; +   case PCI_CHIP_I945_GM: +      chipset = "945GM"; +      break; +   case PCI_CHIP_I945_GME: +      chipset = "945GME"; +      break; +   case PCI_CHIP_G33_G: +      chipset = "G33"; +      break; +   case PCI_CHIP_Q35_G: +      chipset = "Q35"; +      break; +   case PCI_CHIP_Q33_G: +      chipset = "Q33"; +      break; +   default: +      chipset = "unknown"; +      break; +   } + +   util_snprintf(buffer, sizeof(buffer), "i915 (chipset: %s)", chipset); +   return buffer; +} + + +static int +i915_get_param(struct pipe_screen *screen, int param) +{ +   switch (param) { +   case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: +      return 8; +   case PIPE_CAP_NPOT_TEXTURES: +      return 1; +   case PIPE_CAP_TWO_SIDED_STENCIL: +      return 1; +   case PIPE_CAP_GLSL: +      return 0; +   case PIPE_CAP_S3TC: +      return 0; +   case PIPE_CAP_ANISOTROPIC_FILTER: +      return 0; +   case PIPE_CAP_POINT_SPRITE: +      return 0; +   case PIPE_CAP_MAX_RENDER_TARGETS: +      return 1; +   case PIPE_CAP_OCCLUSION_QUERY: +      return 0; +   case PIPE_CAP_TEXTURE_SHADOW_MAP: +      return 1; +   case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: +      return 11; /* max 1024x1024 */ +   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: +      return 8;  /* max 128x128x128 */ +   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: +      return 11; /* max 1024x1024 */ +   default: +      return 0; +   } +} + + +static float +i915_get_paramf(struct pipe_screen *screen, int param) +{ +   switch (param) { +   case PIPE_CAP_MAX_LINE_WIDTH: +      /* fall-through */ +   case PIPE_CAP_MAX_LINE_WIDTH_AA: +      return 7.5; + +   case PIPE_CAP_MAX_POINT_WIDTH: +      /* fall-through */ +   case PIPE_CAP_MAX_POINT_WIDTH_AA: +      return 255.0; + +   case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: +      return 4.0; + +   case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: +      return 16.0; + +   default: +      return 0; +   } +} + + +static boolean +i915_is_format_supported( struct pipe_screen *screen, +                          enum pipe_format format,  +                          enum pipe_texture_target target, +                          unsigned tex_usage,  +                          unsigned geom_flags ) +{ +   static const enum pipe_format tex_supported[] = { +      PIPE_FORMAT_R8G8B8A8_UNORM, +      PIPE_FORMAT_A8R8G8B8_UNORM, +      PIPE_FORMAT_R5G6B5_UNORM, +      PIPE_FORMAT_L8_UNORM, +      PIPE_FORMAT_A8_UNORM, +      PIPE_FORMAT_I8_UNORM, +      PIPE_FORMAT_A8L8_UNORM, +      PIPE_FORMAT_YCBCR, +      PIPE_FORMAT_YCBCR_REV, +      PIPE_FORMAT_S8Z24_UNORM, +      PIPE_FORMAT_NONE  /* list terminator */ +   }; +   static const enum pipe_format surface_supported[] = { +      PIPE_FORMAT_A8R8G8B8_UNORM, +      PIPE_FORMAT_R5G6B5_UNORM, +      PIPE_FORMAT_S8Z24_UNORM, +      /*PIPE_FORMAT_R16G16B16A16_SNORM,*/ +      PIPE_FORMAT_NONE  /* list terminator */ +   }; +   const enum pipe_format *list; +   uint i; + +   if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) +      list = surface_supported; +   else +      list = tex_supported; + +   for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { +      if (list[i] == format) +         return TRUE; +   } + +   return FALSE; +} + + +static void +i915_destroy_screen( struct pipe_screen *screen ) +{ +   struct pipe_winsys *winsys = screen->winsys; + +   if(winsys->destroy) +      winsys->destroy(winsys); + +   FREE(screen); +} + + +static void * +i915_surface_map( struct pipe_screen *screen, +                  struct pipe_surface *surface, +                  unsigned flags ) +{ +   struct i915_texture *tex = (struct i915_texture *)surface->texture; +   char *map = pipe_buffer_map( screen, tex->buffer, flags ); +   if (map == NULL) +      return NULL; + +   if (surface->texture && +       (flags & PIPE_BUFFER_USAGE_CPU_WRITE))  +   { +      /* Do something to notify contexts of a texture change.   +       */ +      /* i915_screen(screen)->timestamp++; */ +   } +    +   return map + surface->offset; +} + +static void +i915_surface_unmap(struct pipe_screen *screen, +                   struct pipe_surface *surface) +{ +   struct i915_texture *tex = (struct i915_texture *)surface->texture; +   pipe_buffer_unmap( screen, tex->buffer ); +} + + + +/** + * Create a new i915_screen object + */ +struct pipe_screen * +i915_create_screen(struct pipe_winsys *winsys, uint pci_id) +{ +   struct i915_screen *i915screen = CALLOC_STRUCT(i915_screen); + +   if (!i915screen) +      return NULL; + +   switch (pci_id) { +   case PCI_CHIP_I915_G: +   case PCI_CHIP_I915_GM: +      i915screen->is_i945 = FALSE; +      break; + +   case PCI_CHIP_I945_G: +   case PCI_CHIP_I945_GM: +   case PCI_CHIP_I945_GME: +   case PCI_CHIP_G33_G: +   case PCI_CHIP_Q33_G: +   case PCI_CHIP_Q35_G: +      i915screen->is_i945 = TRUE; +      break; + +   default: +      debug_printf("%s: unknown pci id 0x%x, cannot create screen\n",  +                   __FUNCTION__, pci_id); +      return NULL; +   } + +   i915screen->pci_id = pci_id; + +   i915screen->screen.winsys = winsys; + +   i915screen->screen.destroy = i915_destroy_screen; + +   i915screen->screen.get_name = i915_get_name; +   i915screen->screen.get_vendor = i915_get_vendor; +   i915screen->screen.get_param = i915_get_param; +   i915screen->screen.get_paramf = i915_get_paramf; +   i915screen->screen.is_format_supported = i915_is_format_supported; +   i915screen->screen.surface_map = i915_surface_map; +   i915screen->screen.surface_unmap = i915_surface_unmap; + +   i915_init_screen_texture_functions(&i915screen->screen); +   u_simple_screen_init(&i915screen->screen); + +   return &i915screen->screen; +} diff --git a/src/gallium/drivers/i915simple/i915_screen.h b/src/gallium/drivers/i915simple/i915_screen.h new file mode 100644 index 0000000000..73b0ff05ce --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_screen.h @@ -0,0 +1,69 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef I915_SCREEN_H +#define I915_SCREEN_H + + +#include "pipe/p_screen.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Subclass of pipe_screen + */ +struct i915_screen +{ +   struct pipe_screen screen; + +   boolean is_i945; +   uint pci_id; +}; + + +/** cast wrapper */ +static INLINE struct i915_screen * +i915_screen(struct pipe_screen *pscreen) +{ +   return (struct i915_screen *) pscreen; +} + + +extern struct pipe_screen * +i915_create_screen(struct pipe_winsys *winsys, uint pci_id); + + +#ifdef __cplusplus +} +#endif + +#endif /* I915_SCREEN_H */ diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c new file mode 100644 index 0000000000..273e74002a --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -0,0 +1,788 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "draw/draw_context.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" + +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "i915_state_inlines.h" +#include "i915_fpc.h" + +/* The i915 (and related graphics cores) do not support GL_CLAMP.  The + * Intel drivers for "other operating systems" implement GL_CLAMP as + * GL_CLAMP_TO_EDGE, so the same is done here. + */ +static unsigned +translate_wrap_mode(unsigned wrap) +{ +   switch (wrap) { +   case PIPE_TEX_WRAP_REPEAT: +      return TEXCOORDMODE_WRAP; +   case PIPE_TEX_WRAP_CLAMP: +      return TEXCOORDMODE_CLAMP_EDGE;   /* not quite correct */ +   case PIPE_TEX_WRAP_CLAMP_TO_EDGE: +      return TEXCOORDMODE_CLAMP_EDGE; +   case PIPE_TEX_WRAP_CLAMP_TO_BORDER: +      return TEXCOORDMODE_CLAMP_BORDER; +//   case PIPE_TEX_WRAP_MIRRORED_REPEAT: +//      return TEXCOORDMODE_MIRROR; +   default: +      return TEXCOORDMODE_WRAP; +   } +} + +static unsigned translate_img_filter( unsigned filter ) +{ +   switch (filter) { +   case PIPE_TEX_FILTER_NEAREST: +      return FILTER_NEAREST; +   case PIPE_TEX_FILTER_LINEAR: +      return FILTER_LINEAR; +   case PIPE_TEX_FILTER_ANISO: +      return FILTER_ANISOTROPIC; +   default: +      assert(0); +      return FILTER_NEAREST; +   } +} + +static unsigned translate_mip_filter( unsigned filter ) +{ +   switch (filter) { +   case PIPE_TEX_MIPFILTER_NONE: +      return MIPFILTER_NONE; +   case PIPE_TEX_MIPFILTER_NEAREST: +      return MIPFILTER_NEAREST; +   case PIPE_TEX_MIPFILTER_LINEAR: +      return MIPFILTER_LINEAR; +   default: +      assert(0); +      return MIPFILTER_NONE; +   } +} + + +/* None of this state is actually used for anything yet. + */ +static void * +i915_create_blend_state(struct pipe_context *pipe, +                        const struct pipe_blend_state *blend) +{ +   struct i915_blend_state *cso_data = CALLOC_STRUCT( i915_blend_state ); + +   { +      unsigned eqRGB  = blend->rgb_func; +      unsigned srcRGB = blend->rgb_src_factor; +      unsigned dstRGB = blend->rgb_dst_factor; + +      unsigned eqA    = blend->alpha_func; +      unsigned srcA   = blend->alpha_src_factor; +      unsigned dstA   = blend->alpha_dst_factor; + +      /* Special handling for MIN/MAX filter modes handled at +       * state_tracker level. +       */ + +      if (srcA != srcRGB || +	  dstA != dstRGB || +	  eqA != eqRGB) { + +	 cso_data->iab = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | +                          IAB_MODIFY_ENABLE | +                          IAB_ENABLE | +                          IAB_MODIFY_FUNC | +                          IAB_MODIFY_SRC_FACTOR | +                          IAB_MODIFY_DST_FACTOR | +                          SRC_ABLND_FACT(i915_translate_blend_factor(srcA)) | +                          DST_ABLND_FACT(i915_translate_blend_factor(dstA)) | +                          (i915_translate_blend_func(eqA) << IAB_FUNC_SHIFT)); +      } +      else { +	 cso_data->iab = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | +                          IAB_MODIFY_ENABLE | +                          0); +      } +   } + +   cso_data->modes4 |= (_3DSTATE_MODES_4_CMD | +                        ENABLE_LOGIC_OP_FUNC | +                        LOGIC_OP_FUNC(i915_translate_logic_op(blend->logicop_func))); + +   if (blend->logicop_enable) +      cso_data->LIS5 |= S5_LOGICOP_ENABLE; + +   if (blend->dither) +      cso_data->LIS5 |= S5_COLOR_DITHER_ENABLE; + +   if ((blend->colormask & PIPE_MASK_R) == 0) +      cso_data->LIS5 |= S5_WRITEDISABLE_RED; + +   if ((blend->colormask & PIPE_MASK_G) == 0) +      cso_data->LIS5 |= S5_WRITEDISABLE_GREEN; + +   if ((blend->colormask & PIPE_MASK_B) == 0) +      cso_data->LIS5 |= S5_WRITEDISABLE_BLUE; + +   if ((blend->colormask & PIPE_MASK_A) == 0) +      cso_data->LIS5 |= S5_WRITEDISABLE_ALPHA; + +   if (blend->blend_enable) { +      unsigned funcRGB = blend->rgb_func; +      unsigned srcRGB  = blend->rgb_src_factor; +      unsigned dstRGB  = blend->rgb_dst_factor; + +      cso_data->LIS6 |= (S6_CBUF_BLEND_ENABLE | +                         SRC_BLND_FACT(i915_translate_blend_factor(srcRGB)) | +                         DST_BLND_FACT(i915_translate_blend_factor(dstRGB)) | +                         (i915_translate_blend_func(funcRGB) << S6_CBUF_BLEND_FUNC_SHIFT)); +   } + +   return cso_data; +} + +static void i915_bind_blend_state(struct pipe_context *pipe, +                                  void *blend) +{ +   struct i915_context *i915 = i915_context(pipe); +   draw_flush(i915->draw); + +   i915->blend = (struct i915_blend_state*)blend; + +   i915->dirty |= I915_NEW_BLEND; +} + + +static void i915_delete_blend_state(struct pipe_context *pipe, void *blend) +{ +   FREE(blend); +} + +static void i915_set_blend_color( struct pipe_context *pipe, +			     const struct pipe_blend_color *blend_color ) +{ +   struct i915_context *i915 = i915_context(pipe); +   draw_flush(i915->draw); + +   i915->blend_color = *blend_color; + +   i915->dirty |= I915_NEW_BLEND; +} + +static void * +i915_create_sampler_state(struct pipe_context *pipe, +                          const struct pipe_sampler_state *sampler) +{ +   struct i915_sampler_state *cso = CALLOC_STRUCT( i915_sampler_state ); +   const unsigned ws = sampler->wrap_s; +   const unsigned wt = sampler->wrap_t; +   const unsigned wr = sampler->wrap_r; +   unsigned minFilt, magFilt; +   unsigned mipFilt; + +   cso->templ = sampler; + +   mipFilt = translate_mip_filter(sampler->min_mip_filter); +   minFilt = translate_img_filter( sampler->min_img_filter ); +   magFilt = translate_img_filter( sampler->mag_img_filter ); +    +   if (sampler->max_anisotropy > 2.0) { +      cso->state[0] |= SS2_MAX_ANISO_4; +   } + +   { +      int b = (int) (sampler->lod_bias * 16.0); +      b = CLAMP(b, -256, 255); +      cso->state[0] |= ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK); +   } + +   /* Shadow: +    */ +   if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)  +   { +      cso->state[0] |= (SS2_SHADOW_ENABLE | +                        i915_translate_compare_func(sampler->compare_func)); + +      minFilt = FILTER_4X4_FLAT; +      magFilt = FILTER_4X4_FLAT; +   } + +   cso->state[0] |= ((minFilt << SS2_MIN_FILTER_SHIFT) | +                     (mipFilt << SS2_MIP_FILTER_SHIFT) | +                     (magFilt << SS2_MAG_FILTER_SHIFT)); + +   cso->state[1] |= +      ((translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT) | +       (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) | +       (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT)); + +   if (sampler->normalized_coords) +      cso->state[1] |= SS3_NORMALIZED_COORDS; + +   { +      int minlod = (int) (16.0 * sampler->min_lod); +      int maxlod = (int) (16.0 * sampler->max_lod); +      minlod = CLAMP(minlod, 0, 16 * 11); +      maxlod = CLAMP(maxlod, 0, 16 * 11); + +      if (minlod > maxlod) +	 maxlod = minlod; + +      cso->minlod = minlod; +      cso->maxlod = maxlod; +   } + +   { +      ubyte r = float_to_ubyte(sampler->border_color[0]); +      ubyte g = float_to_ubyte(sampler->border_color[1]); +      ubyte b = float_to_ubyte(sampler->border_color[2]); +      ubyte a = float_to_ubyte(sampler->border_color[3]); +      cso->state[2] = I915PACKCOLOR8888(r, g, b, a); +   } +   return cso; +} + +static void i915_bind_sampler_states(struct pipe_context *pipe, +                                     unsigned num, void **sampler) +{ +   struct i915_context *i915 = i915_context(pipe); +   unsigned i; + +   assert(num <= PIPE_MAX_SAMPLERS); + +   /* Check for no-op */ +   if (num == i915->num_samplers && +       !memcmp(i915->sampler, sampler, num * sizeof(void *))) +      return; + +   draw_flush(i915->draw); + +   for (i = 0; i < num; ++i) +      i915->sampler[i] = sampler[i]; +   for (i = num; i < PIPE_MAX_SAMPLERS; ++i) +      i915->sampler[i] = NULL; + +   i915->num_samplers = num; + +   i915->dirty |= I915_NEW_SAMPLER; +} + +static void i915_delete_sampler_state(struct pipe_context *pipe, +                                      void *sampler) +{ +   FREE(sampler); +} + + +/** XXX move someday?  Or consolidate all these simple state setters + * into one file. + */ + +static void * +i915_create_depth_stencil_state(struct pipe_context *pipe, +				const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ +   struct i915_depth_stencil_state *cso = CALLOC_STRUCT( i915_depth_stencil_state ); + +   { +      int testmask = depth_stencil->stencil[0].valuemask & 0xff; +      int writemask = depth_stencil->stencil[0].writemask & 0xff; + +      cso->stencil_modes4 |= (_3DSTATE_MODES_4_CMD | +                              ENABLE_STENCIL_TEST_MASK | +                              STENCIL_TEST_MASK(testmask) | +                              ENABLE_STENCIL_WRITE_MASK | +                              STENCIL_WRITE_MASK(writemask)); +   } + +   if (depth_stencil->stencil[0].enabled) { +      int test = i915_translate_compare_func(depth_stencil->stencil[0].func); +      int fop  = i915_translate_stencil_op(depth_stencil->stencil[0].fail_op); +      int dfop = i915_translate_stencil_op(depth_stencil->stencil[0].zfail_op); +      int dpop = i915_translate_stencil_op(depth_stencil->stencil[0].zpass_op); +      int ref  = depth_stencil->stencil[0].ref_value & 0xff; + +      cso->stencil_LIS5 |= (S5_STENCIL_TEST_ENABLE | +                            S5_STENCIL_WRITE_ENABLE | +                            (ref  << S5_STENCIL_REF_SHIFT) | +                            (test << S5_STENCIL_TEST_FUNC_SHIFT) | +                            (fop  << S5_STENCIL_FAIL_SHIFT) | +                            (dfop << S5_STENCIL_PASS_Z_FAIL_SHIFT) | +                            (dpop << S5_STENCIL_PASS_Z_PASS_SHIFT)); +   } + +   if (depth_stencil->stencil[1].enabled) { +      int test  = i915_translate_compare_func(depth_stencil->stencil[1].func); +      int fop   = i915_translate_stencil_op(depth_stencil->stencil[1].fail_op); +      int dfop  = i915_translate_stencil_op(depth_stencil->stencil[1].zfail_op); +      int dpop  = i915_translate_stencil_op(depth_stencil->stencil[1].zpass_op); +      int ref   = depth_stencil->stencil[1].ref_value & 0xff; +      int tmask = depth_stencil->stencil[1].valuemask & 0xff; +      int wmask = depth_stencil->stencil[1].writemask & 0xff; + +      cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS | +                     BFO_ENABLE_STENCIL_FUNCS | +                     BFO_ENABLE_STENCIL_TWO_SIDE | +                     BFO_ENABLE_STENCIL_REF | +                     BFO_STENCIL_TWO_SIDE | +                     (ref  << BFO_STENCIL_REF_SHIFT) | +                     (test << BFO_STENCIL_TEST_SHIFT) | +                     (fop  << BFO_STENCIL_FAIL_SHIFT) | +                     (dfop << BFO_STENCIL_PASS_Z_FAIL_SHIFT) | +                     (dpop << BFO_STENCIL_PASS_Z_PASS_SHIFT)); + +      cso->bfo[1] = (_3DSTATE_BACKFACE_STENCIL_MASKS | +                     BFM_ENABLE_STENCIL_TEST_MASK | +                     BFM_ENABLE_STENCIL_WRITE_MASK | +                     (tmask << BFM_STENCIL_TEST_MASK_SHIFT) | +                     (wmask << BFM_STENCIL_WRITE_MASK_SHIFT)); +   } +   else { +      /* This actually disables two-side stencil: The bit set is a +       * modify-enable bit to indicate we are changing the two-side +       * setting.  Then there is a symbolic zero to show that we are +       * setting the flag to zero/off. +       */ +      cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS | +                     BFO_ENABLE_STENCIL_TWO_SIDE | +                     0); +      cso->bfo[1] = 0; +   } + +   if (depth_stencil->depth.enabled) { +      int func = i915_translate_compare_func(depth_stencil->depth.func); + +      cso->depth_LIS6 |= (S6_DEPTH_TEST_ENABLE | +                          (func << S6_DEPTH_TEST_FUNC_SHIFT)); + +      if (depth_stencil->depth.writemask) +	 cso->depth_LIS6 |= S6_DEPTH_WRITE_ENABLE; +   } + +   if (depth_stencil->alpha.enabled) { +      int test = i915_translate_compare_func(depth_stencil->alpha.func); +      ubyte refByte = float_to_ubyte(depth_stencil->alpha.ref_value); + +      cso->depth_LIS6 |= (S6_ALPHA_TEST_ENABLE | +			  (test << S6_ALPHA_TEST_FUNC_SHIFT) | +			  (((unsigned) refByte) << S6_ALPHA_REF_SHIFT)); +   } + +   return cso; +} + +static void i915_bind_depth_stencil_state(struct pipe_context *pipe, +                                          void *depth_stencil) +{ +   struct i915_context *i915 = i915_context(pipe); +   draw_flush(i915->draw); + +   i915->depth_stencil = (const struct i915_depth_stencil_state *)depth_stencil; + +   i915->dirty |= I915_NEW_DEPTH_STENCIL; +} + +static void i915_delete_depth_stencil_state(struct pipe_context *pipe, +                                            void *depth_stencil) +{ +   FREE(depth_stencil); +} + + +static void i915_set_scissor_state( struct pipe_context *pipe, +                                 const struct pipe_scissor_state *scissor ) +{ +   struct i915_context *i915 = i915_context(pipe); +   draw_flush(i915->draw); + +   memcpy( &i915->scissor, scissor, sizeof(*scissor) ); +   i915->dirty |= I915_NEW_SCISSOR; +} + + +static void i915_set_polygon_stipple( struct pipe_context *pipe, +                                   const struct pipe_poly_stipple *stipple ) +{ +} + + + +static void * +i915_create_fs_state(struct pipe_context *pipe, +                     const struct pipe_shader_state *templ) +{ +   struct i915_context *i915 = i915_context(pipe); +   struct i915_fragment_shader *ifs = CALLOC_STRUCT(i915_fragment_shader); +   if (!ifs) +      return NULL; + +   ifs->state.tokens = tgsi_dup_tokens(templ->tokens); + +   tgsi_scan_shader(templ->tokens, &ifs->info); + +   /* The shader's compiled to i915 instructions here */ +   i915_translate_fragment_program(i915, ifs); + +   return ifs; +} + +static void +i915_bind_fs_state(struct pipe_context *pipe, void *shader) +{ +   struct i915_context *i915 = i915_context(pipe); +   draw_flush(i915->draw); + +   i915->fs = (struct i915_fragment_shader*) shader; + +   i915->dirty |= I915_NEW_FS; +} + +static +void i915_delete_fs_state(struct pipe_context *pipe, void *shader) +{ +   struct i915_fragment_shader *ifs = (struct i915_fragment_shader *) shader; + +   if (ifs->program) +      FREE(ifs->program); +   ifs->program_len = 0; + +   FREE((struct tgsi_token *)ifs->state.tokens); + +   FREE(ifs); +} + + +static void * +i915_create_vs_state(struct pipe_context *pipe, +                     const struct pipe_shader_state *templ) +{ +   struct i915_context *i915 = i915_context(pipe); + +   /* just pass-through to draw module */ +   return draw_create_vertex_shader(i915->draw, templ); +} + +static void i915_bind_vs_state(struct pipe_context *pipe, void *shader) +{ +   struct i915_context *i915 = i915_context(pipe); + +   /* just pass-through to draw module */ +   draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); + +   i915->dirty |= I915_NEW_VS; +} + +static void i915_delete_vs_state(struct pipe_context *pipe, void *shader) +{ +   struct i915_context *i915 = i915_context(pipe); + +   /* just pass-through to draw module */ +   draw_delete_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); +} + +static void i915_set_constant_buffer(struct pipe_context *pipe, +                                     uint shader, uint index, +                                     const struct pipe_constant_buffer *buf) +{ +   struct i915_context *i915 = i915_context(pipe); +   struct pipe_winsys *ws = pipe->winsys; +   draw_flush(i915->draw); + +   assert(shader < PIPE_SHADER_TYPES); +   assert(index == 0); + +   /* Make a copy of shader constants. +    * During fragment program translation we may add additional +    * constants to the array. +    * +    * We want to consider the situation where some user constants +    * (ex: a material color) may change frequently but the shader program +    * stays the same.  In that case we should only be updating the first +    * N constants, leaving any extras from shader translation alone. +    */ +   if (buf) { +      void *mapped; +      if (buf->buffer && buf->buffer->size && +          (mapped = ws->buffer_map(ws, buf->buffer, +                                    PIPE_BUFFER_USAGE_CPU_READ))) { +         memcpy(i915->current.constants[shader], mapped, buf->buffer->size); +         ws->buffer_unmap(ws, buf->buffer); +         i915->current.num_user_constants[shader] +            = buf->buffer->size / (4 * sizeof(float)); +      } +      else { +         i915->current.num_user_constants[shader] = 0; +      } +   } + +   i915->dirty |= I915_NEW_CONSTANTS; +} + + +static void i915_set_sampler_textures(struct pipe_context *pipe, +                                      unsigned num, +                                      struct pipe_texture **texture) +{ +   struct i915_context *i915 = i915_context(pipe); +   uint i; + +   assert(num <= PIPE_MAX_SAMPLERS); + +   /* Check for no-op */ +   if (num == i915->num_textures && +       !memcmp(i915->texture, texture, num * sizeof(struct pipe_texture *))) +      return; + +   /* Fixes wrong texture in texobj with VBUF */ +   draw_flush(i915->draw); + +   for (i = 0; i < num; i++) +      pipe_texture_reference((struct pipe_texture **) &i915->texture[i], +                             texture[i]); + +   for (i = num; i < i915->num_textures; i++) +      pipe_texture_reference((struct pipe_texture **) &i915->texture[i], +                             NULL); + +   i915->num_textures = num; + +   i915->dirty |= I915_NEW_TEXTURE; +} + + + +static void i915_set_framebuffer_state(struct pipe_context *pipe, +				       const struct pipe_framebuffer_state *fb) +{ +   struct i915_context *i915 = i915_context(pipe); +   draw_flush(i915->draw); + +   i915->framebuffer = *fb; /* struct copy */ + +   i915->dirty |= I915_NEW_FRAMEBUFFER; +} + + + +static void i915_set_clip_state( struct pipe_context *pipe, +			     const struct pipe_clip_state *clip ) +{ +   struct i915_context *i915 = i915_context(pipe); +   draw_flush(i915->draw); + +   draw_set_clip_state(i915->draw, clip); + +   i915->dirty |= I915_NEW_CLIP; +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +static void i915_set_viewport_state( struct pipe_context *pipe, +				     const struct pipe_viewport_state *viewport ) +{ +   struct i915_context *i915 = i915_context(pipe); + +   i915->viewport = *viewport; /* struct copy */ + +   /* pass the viewport info to the draw module */ +   draw_set_viewport_state(i915->draw, &i915->viewport); + +   i915->dirty |= I915_NEW_VIEWPORT; +} + + +static void * +i915_create_rasterizer_state(struct pipe_context *pipe, +                             const struct pipe_rasterizer_state *rasterizer) +{ +   struct i915_rasterizer_state *cso = CALLOC_STRUCT( i915_rasterizer_state ); + +   cso->templ = rasterizer; +   cso->color_interp = rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; +   cso->light_twoside = rasterizer->light_twoside; +   cso->ds[0].u = _3DSTATE_DEPTH_OFFSET_SCALE; +   cso->ds[1].f = rasterizer->offset_scale; +   if (rasterizer->poly_stipple_enable) { +      cso->st |= ST1_ENABLE; +   } + +   if (rasterizer->scissor) +      cso->sc[0] = _3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT; +   else +      cso->sc[0] = _3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT; + +   switch (rasterizer->cull_mode) { +   case PIPE_WINDING_NONE: +      cso->LIS4 |= S4_CULLMODE_NONE; +      break; +   case PIPE_WINDING_CW: +      cso->LIS4 |= S4_CULLMODE_CW; +      break; +   case PIPE_WINDING_CCW: +      cso->LIS4 |= S4_CULLMODE_CCW; +      break; +   case PIPE_WINDING_BOTH: +      cso->LIS4 |= S4_CULLMODE_BOTH; +      break; +   } + +   { +      int line_width = CLAMP((int)(rasterizer->line_width * 2), 1, 0xf); + +      cso->LIS4 |= line_width << S4_LINE_WIDTH_SHIFT; + +      if (rasterizer->line_smooth) +	 cso->LIS4 |= S4_LINE_ANTIALIAS_ENABLE; +   } + +   { +      int point_size = CLAMP((int) rasterizer->point_size, 1, 0xff); + +      cso->LIS4 |= point_size << S4_POINT_WIDTH_SHIFT; +   } + +   if (rasterizer->flatshade) { +      cso->LIS4 |= (S4_FLATSHADE_ALPHA | +                    S4_FLATSHADE_COLOR | +                    S4_FLATSHADE_SPECULAR); +   } + +   cso->LIS7 = fui( rasterizer->offset_units ); + + +   return cso; +} + +static void i915_bind_rasterizer_state( struct pipe_context *pipe, +                                        void *raster ) +{ +   struct i915_context *i915 = i915_context(pipe); + +   i915->rasterizer = (struct i915_rasterizer_state *)raster; + +   /* pass-through to draw module */ +   draw_set_rasterizer_state(i915->draw, +                          (i915->rasterizer ? i915->rasterizer->templ : NULL)); + +   i915->dirty |= I915_NEW_RASTERIZER; +} + +static void i915_delete_rasterizer_state(struct pipe_context *pipe, +                                         void *raster) +{ +   FREE(raster); +} + +static void i915_set_vertex_buffers(struct pipe_context *pipe, +                                    unsigned count, +                                    const struct pipe_vertex_buffer *buffers) +{ +   struct i915_context *i915 = i915_context(pipe); +   /* Because we change state before the draw_set_vertex_buffers call +    * we need a flush here, just to be sure. +    */ +   draw_flush(i915->draw); + +   memcpy(i915->vertex_buffer, buffers, count * sizeof(buffers[0])); +   i915->num_vertex_buffers = count; + +   /* pass-through to draw module */ +   draw_set_vertex_buffers(i915->draw, count, buffers); +} + +static void i915_set_vertex_elements(struct pipe_context *pipe, +                                     unsigned count, +                                     const struct pipe_vertex_element *elements) +{ +   struct i915_context *i915 = i915_context(pipe); +   /* Because we change state before the draw_set_vertex_buffers call +    * we need a flush here, just to be sure. +    */ +   draw_flush(i915->draw); + +   i915->num_vertex_elements = count; +   /* pass-through to draw module */ +   draw_set_vertex_elements(i915->draw, count, elements); +} + + +static void i915_set_edgeflags(struct pipe_context *pipe, +                               const unsigned *bitfield) +{ +   /* TODO do something here */ +} + +void +i915_init_state_functions( struct i915_context *i915 ) +{ +   i915->pipe.set_edgeflags = i915_set_edgeflags; +   i915->pipe.create_blend_state = i915_create_blend_state; +   i915->pipe.bind_blend_state = i915_bind_blend_state; +   i915->pipe.delete_blend_state = i915_delete_blend_state; + +   i915->pipe.create_sampler_state = i915_create_sampler_state; +   i915->pipe.bind_sampler_states = i915_bind_sampler_states; +   i915->pipe.delete_sampler_state = i915_delete_sampler_state; + +   i915->pipe.create_depth_stencil_alpha_state = i915_create_depth_stencil_state; +   i915->pipe.bind_depth_stencil_alpha_state = i915_bind_depth_stencil_state; +   i915->pipe.delete_depth_stencil_alpha_state = i915_delete_depth_stencil_state; + +   i915->pipe.create_rasterizer_state = i915_create_rasterizer_state; +   i915->pipe.bind_rasterizer_state = i915_bind_rasterizer_state; +   i915->pipe.delete_rasterizer_state = i915_delete_rasterizer_state; +   i915->pipe.create_fs_state = i915_create_fs_state; +   i915->pipe.bind_fs_state = i915_bind_fs_state; +   i915->pipe.delete_fs_state = i915_delete_fs_state; +   i915->pipe.create_vs_state = i915_create_vs_state; +   i915->pipe.bind_vs_state = i915_bind_vs_state; +   i915->pipe.delete_vs_state = i915_delete_vs_state; + +   i915->pipe.set_blend_color = i915_set_blend_color; +   i915->pipe.set_clip_state = i915_set_clip_state; +   i915->pipe.set_constant_buffer = i915_set_constant_buffer; +   i915->pipe.set_framebuffer_state = i915_set_framebuffer_state; + +   i915->pipe.set_polygon_stipple = i915_set_polygon_stipple; +   i915->pipe.set_scissor_state = i915_set_scissor_state; +   i915->pipe.set_sampler_textures = i915_set_sampler_textures; +   i915->pipe.set_viewport_state = i915_set_viewport_state; +   i915->pipe.set_vertex_buffers = i915_set_vertex_buffers; +   i915->pipe.set_vertex_elements = i915_set_vertex_elements; +} diff --git a/src/gallium/drivers/i915simple/i915_state.h b/src/gallium/drivers/i915simple/i915_state.h new file mode 100644 index 0000000000..86c6b0027d --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state.h @@ -0,0 +1,50 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef I915_STATE_H +#define I915_STATE_H + +struct i915_context; + + +struct i915_tracked_state { +   unsigned dirty; +   void (*update)( struct i915_context * ); +}; + +void i915_update_immediate( struct i915_context *i915 ); +void i915_update_dynamic( struct i915_context *i915 ); +void i915_update_derived( struct i915_context *i915 ); +void i915_update_samplers( struct i915_context *i915 ); +void i915_update_textures(struct i915_context *i915); + +void i915_emit_hardware_state( struct i915_context *i915 ); + +#endif diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c new file mode 100644 index 0000000000..178d4e8781 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -0,0 +1,183 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "i915_context.h" +#include "i915_state.h" +#include "i915_reg.h" +#include "i915_fpc.h" + + + +/** + * Determine the hardware vertex layout. + * Depends on vertex/fragment shader state. + */ +static void calculate_vertex_layout( struct i915_context *i915 ) +{ +   const struct i915_fragment_shader *fs = i915->fs; +   const enum interp_mode colorInterp = i915->rasterizer->color_interp; +   struct vertex_info vinfo; +   boolean texCoords[8], colors[2], fog, needW; +   uint i; +   int src; + +   memset(texCoords, 0, sizeof(texCoords)); +   colors[0] = colors[1] = fog = needW = FALSE; +   memset(&vinfo, 0, sizeof(vinfo)); + +   /* Determine which fragment program inputs are needed.  Setup HW vertex +    * layout below, in the HW-specific attribute order. +    */ +   for (i = 0; i < fs->info.num_inputs; i++) { +      switch (fs->info.input_semantic_name[i]) { +      case TGSI_SEMANTIC_POSITION: +         break; +      case TGSI_SEMANTIC_COLOR: +         assert(fs->info.input_semantic_index[i] < 2); +         colors[fs->info.input_semantic_index[i]] = TRUE; +         break; +      case TGSI_SEMANTIC_GENERIC: +         /* usually a texcoord */ +         { +            const uint unit = fs->info.input_semantic_index[i]; +            assert(unit < 8); +            texCoords[unit] = TRUE; +            needW = TRUE; +         } +         break; +      case TGSI_SEMANTIC_FOG: +         fog = TRUE; +         break; +      default: +         assert(0); +      } +   } + +    +   /* pos */ +   src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); +   if (needW) { +      draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src); +      vinfo.hwfmt[0] |= S4_VFMT_XYZW; +      vinfo.attrib[0].emit = EMIT_4F; +   } +   else { +      draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src); +      vinfo.hwfmt[0] |= S4_VFMT_XYZ; +      vinfo.attrib[0].emit = EMIT_3F; +   } + +   /* hardware point size */ +   /* XXX todo */ + +   /* primary color */ +   if (colors[0]) { +      src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); +      draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); +      vinfo.hwfmt[0] |= S4_VFMT_COLOR; +   } + +   /* secondary color */ +   if (colors[1]) { +      src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); +      draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); +      vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; +   } + +   /* fog coord, not fog blend factor */ +   if (fog) { +      src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0); +      draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); +      vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM; +   } + +   /* texcoords */ +   for (i = 0; i < 8; i++) { +      uint hwtc; +      if (texCoords[i]) { +         hwtc = TEXCOORDFMT_4D; +         src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); +         draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); +      } +      else { +         hwtc = TEXCOORDFMT_NOT_PRESENT; +      } +      vinfo.hwfmt[1] |= hwtc << (i * 4); +   } + +   draw_compute_vertex_size(&vinfo); + +   if (memcmp(&i915->current.vertex_info, &vinfo, sizeof(vinfo))) { +      /* Need to set this flag so that the LIS2/4 registers get set. +       * It also means the i915_update_immediate() function must be called +       * after this one, in i915_update_derived(). +       */ +      i915->dirty |= I915_NEW_VERTEX_FORMAT; + +      memcpy(&i915->current.vertex_info, &vinfo, sizeof(vinfo)); +   } +} + + + + +/* Hopefully this will remain quite simple, otherwise need to pull in + * something like the state tracker mechanism. + */ +void i915_update_derived( struct i915_context *i915 ) +{ +   if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS)) +      calculate_vertex_layout( i915 ); + +   if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_TEXTURE)) +      i915_update_samplers(i915); + +   if (i915->dirty & I915_NEW_TEXTURE) +      i915_update_textures(i915); + +   if (i915->dirty) +      i915_update_immediate( i915 ); + +   if (i915->dirty) +      i915_update_dynamic( i915 ); + +   if (i915->dirty & I915_NEW_FS) { +      i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */ +   } + +   /* HW emit currently references framebuffer state directly: +    */ +   if (i915->dirty & I915_NEW_FRAMEBUFFER) +      i915->hardware_dirty |= I915_HW_STATIC; + +   i915->dirty = 0; +} diff --git a/src/gallium/drivers/i915simple/i915_state_dynamic.c b/src/gallium/drivers/i915simple/i915_state_dynamic.c new file mode 100644 index 0000000000..86126a5a15 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_dynamic.c @@ -0,0 +1,310 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "i915_batch.h" +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" + +#define FILE_DEBUG_FLAG DEBUG_STATE + +/* State that we have chosen to store in the DYNAMIC segment of the + * i915 indirect state mechanism.   + * + * Can't cache these in the way we do the static state, as there is no + * start/size in the command packet, instead an 'end' value that gets + * incremented. + * + * Additionally, there seems to be a requirement to re-issue the full + * (active) state every time a 4kb boundary is crossed. + */ + +static INLINE void set_dynamic_indirect( struct i915_context *i915, +					 unsigned offset, +					 const unsigned *src, +					 unsigned dwords ) +{ +   unsigned i; + +   for (i = 0; i < dwords; i++) +      i915->current.dynamic[offset + i] = src[i]; + +   i915->hardware_dirty |= I915_HW_DYNAMIC; +} + + +/*********************************************************************** + * Modes4: stencil masks and logicop  + */ +static void upload_MODES4( struct i915_context *i915 ) +{ +   unsigned modes4 = 0; + +   /* I915_NEW_STENCIL */ +   modes4 |= i915->depth_stencil->stencil_modes4; +   /* I915_NEW_BLEND */ +   modes4 |= i915->blend->modes4; + +   /* Always, so that we know when state is in-active:  +    */ +   set_dynamic_indirect( i915,  +			 I915_DYNAMIC_MODES4, +			 &modes4, +			 1 ); +} + +const struct i915_tracked_state i915_upload_MODES4 = { +   I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL, +   upload_MODES4 +}; + + + + +/*********************************************************************** + */ + +static void upload_BFO( struct i915_context *i915 ) +{ +   set_dynamic_indirect( i915, +			 I915_DYNAMIC_BFO_0, +			 &(i915->depth_stencil->bfo[0]), +			 2 ); +} + +const struct i915_tracked_state i915_upload_BFO = { +   I915_NEW_DEPTH_STENCIL, +   upload_BFO +}; + + +/*********************************************************************** + */ + + +static void upload_BLENDCOLOR( struct i915_context *i915 ) +{ +   unsigned bc[2]; + +   memset( bc, 0, sizeof(bc) ); + +   /* I915_NEW_BLEND {_COLOR}  +    */ +   { +      const float *color = i915->blend_color.color; + +      bc[0] = _3DSTATE_CONST_BLEND_COLOR_CMD; +      bc[1] = pack_ui32_float4( color[0], +				color[1], +				color[2],  +				color[3] ); +   } + +   set_dynamic_indirect( i915,  +			 I915_DYNAMIC_BC_0, +			 bc, +			 2 ); +} + +const struct i915_tracked_state i915_upload_BLENDCOLOR = { +   I915_NEW_BLEND, +   upload_BLENDCOLOR +}; + +/*********************************************************************** + */ + + +static void upload_IAB( struct i915_context *i915 ) +{ +   unsigned iab = i915->blend->iab; + + +   set_dynamic_indirect( i915, +			 I915_DYNAMIC_IAB, +			 &iab, +			 1 ); +} + +const struct i915_tracked_state i915_upload_IAB = { +   I915_NEW_BLEND, +   upload_IAB +}; + + +/*********************************************************************** + */ + + + +static void upload_DEPTHSCALE( struct i915_context *i915 ) +{ +   set_dynamic_indirect( i915, +			 I915_DYNAMIC_DEPTHSCALE_0, +			 &(i915->rasterizer->ds[0].u), +			 2 ); +} + +const struct i915_tracked_state i915_upload_DEPTHSCALE = { +   I915_NEW_RASTERIZER, +   upload_DEPTHSCALE +}; + + + +/*********************************************************************** + * Polygon stipple + * + * The i915 supports a 4x4 stipple natively, GL wants 32x32. + * Fortunately stipple is usually a repeating pattern. + * + * XXX: does stipple pattern need to be adjusted according to + * the window position? + * + * XXX: possibly need workaround for conform paths test.  + */ + +static void upload_STIPPLE( struct i915_context *i915 ) +{ +   unsigned st[2]; + +   st[0] = _3DSTATE_STIPPLE; +   st[1] = 0; + +   /* I915_NEW_RASTERIZER +    */ +   st[1] |= i915->rasterizer->st; + + +   /* I915_NEW_STIPPLE +    */ +   { +      const ubyte *mask = (const ubyte *)i915->poly_stipple.stipple; +      ubyte p[4]; + +      p[0] = mask[12] & 0xf; +      p[1] = mask[8] & 0xf; +      p[2] = mask[4] & 0xf; +      p[3] = mask[0] & 0xf; + +      /* Not sure what to do about fallbacks, so for now just dont: +       */ +      st[1] |= ((p[0] << 0) | +		(p[1] << 4) | +		(p[2] << 8) |  +		(p[3] << 12)); +   } + + +   set_dynamic_indirect( i915,  +			 I915_DYNAMIC_STP_0, +			 &st[0], +			 2 ); +} + + +const struct i915_tracked_state i915_upload_STIPPLE = { +   I915_NEW_RASTERIZER | I915_NEW_STIPPLE, +   upload_STIPPLE +}; + + + +/*********************************************************************** + * Scissor. + */ +static void upload_SCISSOR_ENABLE( struct i915_context *i915 ) +{ +   set_dynamic_indirect( i915, +			 I915_DYNAMIC_SC_ENA_0, +			 &(i915->rasterizer->sc[0]), +			 1 ); +} + +const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = { +   I915_NEW_RASTERIZER, +   upload_SCISSOR_ENABLE +}; + + + +static void upload_SCISSOR_RECT( struct i915_context *i915 ) +{ +   unsigned x1 = i915->scissor.minx; +   unsigned y1 = i915->scissor.miny; +   unsigned x2 = i915->scissor.maxx; +   unsigned y2 = i915->scissor.maxy; +   unsigned sc[3]; +  +   sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD; +   sc[1] = (y1 << 16) | (x1 & 0xffff); +   sc[2] = (y2 << 16) | (x2 & 0xffff); + +   set_dynamic_indirect( i915,  +			 I915_DYNAMIC_SC_RECT_0, +			 &sc[0], +			 3 ); +} + + +const struct i915_tracked_state i915_upload_SCISSOR_RECT = { +   I915_NEW_SCISSOR, +   upload_SCISSOR_RECT +}; + + + + + + +static const struct i915_tracked_state *atoms[] = { +   &i915_upload_MODES4, +   &i915_upload_BFO, +   &i915_upload_BLENDCOLOR, +   &i915_upload_IAB, +   &i915_upload_DEPTHSCALE, +   &i915_upload_STIPPLE, +   &i915_upload_SCISSOR_ENABLE, +   &i915_upload_SCISSOR_RECT +}; + +/* These will be dynamic indirect state commands, but for now just end + * up on the batch buffer with everything else. + */ +void i915_update_dynamic( struct i915_context *i915 ) +{ +   int i; + +   for (i = 0; i < Elements(atoms); i++) +      if (i915->dirty & atoms[i]->dirty) +	 atoms[i]->update( i915 ); +} + diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c new file mode 100644 index 0000000000..6558cf1c3e --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -0,0 +1,410 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_batch.h" +#include "i915_reg.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" + +static unsigned translate_format( enum pipe_format format ) +{ +   switch (format) { +   case PIPE_FORMAT_A8R8G8B8_UNORM: +      return COLOR_BUF_ARGB8888; +   case PIPE_FORMAT_R5G6B5_UNORM: +      return COLOR_BUF_RGB565; +   default: +      assert(0); +      return 0; +   } +} + +static unsigned translate_depth_format( enum pipe_format zformat ) +{ +   switch (zformat) { +   case PIPE_FORMAT_S8Z24_UNORM: +      return DEPTH_FRMT_24_FIXED_8_OTHER; +   case PIPE_FORMAT_Z16_UNORM: +      return DEPTH_FRMT_16_FIXED; +   default: +      assert(0); +      return 0; +   } +} + + +/** + * Examine framebuffer state to determine width, height. + */ +static boolean +framebuffer_size(const struct pipe_framebuffer_state *fb, +                 uint *width, uint *height) +{ +   if (fb->cbufs[0]) { +      *width = fb->cbufs[0]->width; +      *height = fb->cbufs[0]->height; +      return TRUE; +   } +   else if (fb->zsbuf) { +      *width = fb->zsbuf->width; +      *height = fb->zsbuf->height; +      return TRUE; +   } +   else { +      *width = *height = 0; +      return FALSE; +   } +} + + +/* Push the state into the sarea and/or texture memory. + */ +void +i915_emit_hardware_state(struct i915_context *i915 ) +{ +   /* XXX: there must be an easier way */ +   const unsigned dwords = ( 14 +  +                             7 +  +                             I915_MAX_DYNAMIC +  +                             8 +  +                             2 + I915_TEX_UNITS*3 +  +                             2 + I915_TEX_UNITS*3 + +                             2 + I915_MAX_CONSTANT*4 +  +#if 0 +                             i915->current.program_len +  +#else +                             i915->fs->program_len +  +#endif +                             6  +                           ) * 3/2; /* plus 50% margin */ +   const unsigned relocs = ( I915_TEX_UNITS + +	                     3 +                           ) * 3/2; /* plus 50% margin */ + +#if 0 +   debug_printf("i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs); +#endif +    +   if(!BEGIN_BATCH(dwords, relocs)) { +      FLUSH_BATCH(NULL); +      assert(BEGIN_BATCH(dwords, relocs)); +   } + +   /* 14 dwords, 0 relocs */ +   if (i915->hardware_dirty & I915_HW_INVARIENT) +   { +      OUT_BATCH(_3DSTATE_AA_CMD | +		AA_LINE_ECAAR_WIDTH_ENABLE | +		AA_LINE_ECAAR_WIDTH_1_0 | +		AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); + +      OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); +      OUT_BATCH(0); + +      OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); +      OUT_BATCH(0); +       +      OUT_BATCH(_3DSTATE_DFLT_Z_CMD); +      OUT_BATCH(0); + +      OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | +		CSB_TCB(0, 0) | +		CSB_TCB(1, 1) | +		CSB_TCB(2, 2) | +		CSB_TCB(3, 3) | +		CSB_TCB(4, 4) |  +		CSB_TCB(5, 5) |  +		CSB_TCB(6, 6) |  +		CSB_TCB(7, 7)); + +      OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | +		ENABLE_POINT_RASTER_RULE | +		OGL_POINT_RASTER_RULE | +		ENABLE_LINE_STRIP_PROVOKE_VRTX | +		ENABLE_TRI_FAN_PROVOKE_VRTX | +		LINE_STRIP_PROVOKE_VRTX(1) | +		TRI_FAN_PROVOKE_VRTX(2) |  +		ENABLE_TEXKILL_3D_4D |  +		TEXKILL_4D); + +      /* Need to initialize this to zero. +       */ +      OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); +      OUT_BATCH(0); + +      OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); + +      /* disable indirect state for now +       */ +      OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); +      OUT_BATCH(0); +   } +    +   /* 7 dwords, 1 relocs */ +   if (i915->hardware_dirty & I915_HW_IMMEDIATE) +   { +      OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |  +		I1_LOAD_S(0) | +		I1_LOAD_S(1) | +		I1_LOAD_S(2) | +		I1_LOAD_S(4) | +		I1_LOAD_S(5) | +		I1_LOAD_S(6) |  +		(5)); +       +      if(i915->vbo) +         OUT_RELOC(i915->vbo, +                   I915_BUFFER_ACCESS_READ, +                   i915->current.immediate[I915_IMMEDIATE_S0]); +      else +	 /* FIXME: we should not do this */ +	 OUT_BATCH(0); +      OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]); +      OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]); +      OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]); +      OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]); +      OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]); +   }  +    +   /* I915_MAX_DYNAMIC dwords, 0 relocs */ +   if (i915->hardware_dirty & I915_HW_DYNAMIC)  +   { +      int i; +      for (i = 0; i < I915_MAX_DYNAMIC; i++) { +	 OUT_BATCH(i915->current.dynamic[i]); +      } +   } +    +   /* 8 dwords, 2 relocs */ +   if (i915->hardware_dirty & I915_HW_STATIC) +   { +      struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; +      struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; + +      if (cbuf_surface) { +	 unsigned cpitch = cbuf_surface->stride; +	 unsigned ctile = BUF_3D_USE_FENCE; +         struct i915_texture *tex = (struct i915_texture *) +                                    cbuf_surface->texture; +         struct pipe_buffer *buffer = tex->buffer; +         assert(tex); + +	 if (tex && tex->tiled) { +	    ctile = BUF_3D_TILED_SURFACE; +	 } + +	 OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + +	 OUT_BATCH(BUF_3D_ID_COLOR_BACK | +		   BUF_3D_PITCH(cpitch) |  /* pitch in bytes */ +		   ctile); + +	 OUT_RELOC(tex->buffer, +		   I915_BUFFER_ACCESS_WRITE, +		   cbuf_surface->offset); +      } + +      /* What happens if no zbuf?? +       */ +      if (depth_surface) { +	 unsigned zpitch = depth_surface->stride; +	 unsigned ztile = BUF_3D_USE_FENCE; +         struct i915_texture *tex = (struct i915_texture *) +                                    depth_surface->texture; +         struct pipe_buffer *buffer = tex->buffer; +         assert(tex); + +	 if (tex && tex->tiled) { +	    ztile = BUF_3D_TILED_SURFACE; +	 } + +	 OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + +	 OUT_BATCH(BUF_3D_ID_DEPTH | +		   BUF_3D_PITCH(zpitch) |  /* pitch in bytes */ +		   ztile); + +	 OUT_RELOC(tex->buffer, +		   I915_BUFFER_ACCESS_WRITE, +		   depth_surface->offset); +      } +    +      { +	 unsigned cformat, zformat = 0; +       +	 if (cbuf_surface) +            cformat = cbuf_surface->format; +         else +            cformat = PIPE_FORMAT_A8R8G8B8_UNORM; /* arbitrary */ +         cformat = translate_format(cformat); + +	 if (depth_surface)  +	    zformat = translate_depth_format( i915->framebuffer.zsbuf->format ); + +	 OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); +	 OUT_BATCH(DSTORG_HORT_BIAS(0x8) | /* .5 */ +		   DSTORG_VERT_BIAS(0x8) | /* .5 */ +		   LOD_PRECLAMP_OGL | +		   TEX_DEFAULT_COLOR_OGL | +		   cformat | +		   zformat ); +      } +   } + +#if 01 +      /* texture images */ +      /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */ +      if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER)) +      { +         const uint nr = i915->current.sampler_enable_nr; +         if (nr) { +            const uint enabled = i915->current.sampler_enable_flags; +            uint unit; +            uint count = 0; +            OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); +            OUT_BATCH(enabled); +            for (unit = 0; unit < I915_TEX_UNITS; unit++) { +               if (enabled & (1 << unit)) { +                  struct pipe_buffer *buf = +                     i915->texture[unit]->buffer; +                  uint offset = 0; +                  assert(buf); + +                  count++; + +                  OUT_RELOC(buf, +                            I915_BUFFER_ACCESS_READ, +                            offset); +                  OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ +                  OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ +               } +            } +            assert(count == nr); +         } +      } +#endif + +#if 01 +   /* samplers */ +   /* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */ +   if (i915->hardware_dirty & I915_HW_SAMPLER)  +   { +      if (i915->current.sampler_enable_nr) { +	 int i; +	  +	 OUT_BATCH( _3DSTATE_SAMPLER_STATE |  +		    (3 * i915->current.sampler_enable_nr) ); + +	 OUT_BATCH( i915->current.sampler_enable_flags ); + +	 for (i = 0; i < I915_TEX_UNITS; i++) { +	    if (i915->current.sampler_enable_flags & (1<<i)) { +	       OUT_BATCH( i915->current.sampler[i][0] ); +	       OUT_BATCH( i915->current.sampler[i][1] ); +	       OUT_BATCH( i915->current.sampler[i][2] ); +	    } +	 } +      } +   } +#endif + +   /* constants */ +   /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ +   if (i915->hardware_dirty & I915_HW_PROGRAM) +   { +      /* Collate the user-defined constants with the fragment shader's +       * immediates according to the constant_flags[] array. +       */ +      const uint nr = i915->fs->num_constants; +      if (nr) { +         uint i; + +         OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); +         OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) ); + +         for (i = 0; i < nr; i++) { +            const uint *c; +            if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { +               /* grab user-defined constant */ +               c = (uint *) i915->current.constants[PIPE_SHADER_FRAGMENT][i]; +            } +            else { +               /* emit program constant */ +               c = (uint *) i915->fs->constants[i]; +            } +#if 0 /* debug */ +            { +               float *f = (float *) c; +               printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], +                      (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER +                       ? "user" : "immediate")); +            } +#endif +            OUT_BATCH(*c++); +            OUT_BATCH(*c++); +            OUT_BATCH(*c++); +            OUT_BATCH(*c++); +         } +      } +   } + +   /* Fragment program */ +   /* i915->current.program_len dwords, 0 relocs */ +   if (i915->hardware_dirty & I915_HW_PROGRAM) +   { +      uint i; +      /* we should always have, at least, a pass-through program */ +      assert(i915->fs->program_len > 0); +      for (i = 0; i < i915->fs->program_len; i++) { +         OUT_BATCH(i915->fs->program[i]); +      } +   } + +   /* drawing surface size */ +   /* 6 dwords, 0 relocs */ +   { +      uint w, h; +      boolean k = framebuffer_size(&i915->framebuffer, &w, &h); +      (void)k; +      assert(k); + +      OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); +      OUT_BATCH(0); +      OUT_BATCH(0); +      OUT_BATCH(((w - 1) & 0xffff) | ((h - 1) << 16)); +      OUT_BATCH(0); +      OUT_BATCH(0); +   } + + +   i915->hardware_dirty = 0; +} diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c new file mode 100644 index 0000000000..8c16bb4e27 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_immediate.c @@ -0,0 +1,225 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ +  +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_state.h" +#include "i915_reg.h" +#include "util/u_memory.h" + + +/* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. + * Would like to opportunistically recombine all these fragments into + * a single packet containing only what has changed, but for now emit + * as multiple packets. + */ + + + + +/*********************************************************************** + * S0,S1: Vertex buffer state.   + */ +static void upload_S0S1(struct i915_context *i915) +{ +   unsigned LIS0, LIS1; + +   /* INTEL_NEW_VBO */ +   /* TODO: re-use vertex buffers here? */ +   LIS0 = i915->vbo_offset; + +   /* INTEL_NEW_VERTEX_SIZE -- do this where the vertex size is calculated!  +    */ +   { +      unsigned vertex_size = i915->current.vertex_info.size; + +      LIS1 = ((vertex_size << 24) | +	      (vertex_size << 16)); +   } + +   /* INTEL_NEW_VBO */ +   /* TODO: use a vertex generation number to track vbo changes */ +   if (1 || +       i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 || +       i915->current.immediate[I915_IMMEDIATE_S1] != LIS1)  +   { +      i915->current.immediate[I915_IMMEDIATE_S0] = LIS0; +      i915->current.immediate[I915_IMMEDIATE_S1] = LIS1; +      i915->hardware_dirty |= I915_HW_IMMEDIATE; +   } +} + +const struct i915_tracked_state i915_upload_S0S1 = { +   I915_NEW_VBO | I915_NEW_VERTEX_FORMAT, +   upload_S0S1 +}; + + + + +/*********************************************************************** + * S4: Vertex format, rasterization state + */ +static void upload_S2S4(struct i915_context *i915) +{ +   unsigned LIS2, LIS4; + +   /* I915_NEW_VERTEX_FORMAT */ +   { +      LIS2 = i915->current.vertex_info.hwfmt[1]; +      LIS4 = i915->current.vertex_info.hwfmt[0]; +      /* +      debug_printf("LIS2: 0x%x  LIS4: 0x%x\n", LIS2, LIS4); +      */ +      assert(LIS4); /* should never be zero? */ +   } + +   LIS4 |= i915->rasterizer->LIS4; + +   if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] || +       LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) { + +      i915->current.immediate[I915_IMMEDIATE_S2] = LIS2; +      i915->current.immediate[I915_IMMEDIATE_S4] = LIS4; +      i915->hardware_dirty |= I915_HW_IMMEDIATE; +   } +} + + +const struct i915_tracked_state i915_upload_S2S4 = { +   I915_NEW_RASTERIZER | I915_NEW_VERTEX_FORMAT, +   upload_S2S4 +}; + + + +/*********************************************************************** + *  + */ +static void upload_S5( struct i915_context *i915 ) +{ +   unsigned LIS5 = 0; + +   LIS5 |= i915->depth_stencil->stencil_LIS5; + +   LIS5 |= i915->blend->LIS5; + +#if 0 +   /* I915_NEW_RASTERIZER */ +   if (i915->state.Polygon->OffsetFill) { +      LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE; +   } +#endif + + +   if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) { +      i915->current.immediate[I915_IMMEDIATE_S5] = LIS5; +      i915->hardware_dirty |= I915_HW_IMMEDIATE; +   } +} + +const struct i915_tracked_state i915_upload_S5 = { +   (I915_NEW_DEPTH_STENCIL | I915_NEW_BLEND | I915_NEW_RASTERIZER), +   upload_S5 +}; + + +/*********************************************************************** + */ +static void upload_S6( struct i915_context *i915 ) +{ +   unsigned LIS6 = (2 << S6_TRISTRIP_PV_SHIFT); + +   /* I915_NEW_FRAMEBUFFER +    */ +   if (i915->framebuffer.cbufs[0]) +      LIS6 |= S6_COLOR_WRITE_ENABLE; + +   /* I915_NEW_BLEND +    */ +   LIS6 |= i915->blend->LIS6; + +   /* I915_NEW_DEPTH +    */ +   LIS6 |= i915->depth_stencil->depth_LIS6; + +   if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) { +      i915->current.immediate[I915_IMMEDIATE_S6] = LIS6; +      i915->hardware_dirty |= I915_HW_IMMEDIATE; +   } +} + +const struct i915_tracked_state i915_upload_S6 = { +   I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER, +   upload_S6 +}; + + +/*********************************************************************** + */ +static void upload_S7( struct i915_context *i915 ) +{ +   unsigned LIS7; + +   /* I915_NEW_RASTERIZER +    */ +   LIS7 = i915->rasterizer->LIS7; + +   if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) { +      i915->current.immediate[I915_IMMEDIATE_S7] = LIS7; +      i915->hardware_dirty |= I915_HW_IMMEDIATE; +   } +} + +const struct i915_tracked_state i915_upload_S7 = { +   I915_NEW_RASTERIZER, +   upload_S7 +}; + + +static const struct i915_tracked_state *atoms[] = { +   &i915_upload_S0S1, +   &i915_upload_S2S4, +   &i915_upload_S5, +   &i915_upload_S6, +   &i915_upload_S7 +}; + +/*  + */ +void i915_update_immediate( struct i915_context *i915 ) +{ +   int i; + +   for (i = 0; i < Elements(atoms); i++) +      if (i915->dirty & atoms[i]->dirty) +	 atoms[i]->update( i915 ); +} diff --git a/src/gallium/drivers/i915simple/i915_state_inlines.h b/src/gallium/drivers/i915simple/i915_state_inlines.h new file mode 100644 index 0000000000..378de8f9c4 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_inlines.h @@ -0,0 +1,230 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef I915_STATE_INLINES_H +#define I915_STATE_INLINES_H + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" +#include "i915_reg.h" + + +static INLINE unsigned +i915_translate_compare_func(unsigned func) +{ +   switch (func) { +   case PIPE_FUNC_NEVER: +      return COMPAREFUNC_NEVER; +   case PIPE_FUNC_LESS: +      return COMPAREFUNC_LESS; +   case PIPE_FUNC_LEQUAL: +      return COMPAREFUNC_LEQUAL; +   case PIPE_FUNC_GREATER: +      return COMPAREFUNC_GREATER; +   case PIPE_FUNC_GEQUAL: +      return COMPAREFUNC_GEQUAL; +   case PIPE_FUNC_NOTEQUAL: +      return COMPAREFUNC_NOTEQUAL; +   case PIPE_FUNC_EQUAL: +      return COMPAREFUNC_EQUAL; +   case PIPE_FUNC_ALWAYS: +      return COMPAREFUNC_ALWAYS; +   default: +      return COMPAREFUNC_ALWAYS; +   } +} + +static INLINE unsigned +i915_translate_stencil_op(unsigned op) +{ +   switch (op) { +   case PIPE_STENCIL_OP_KEEP: +      return STENCILOP_KEEP; +   case PIPE_STENCIL_OP_ZERO: +      return STENCILOP_ZERO; +   case PIPE_STENCIL_OP_REPLACE: +      return STENCILOP_REPLACE; +   case PIPE_STENCIL_OP_INCR: +      return STENCILOP_INCRSAT; +   case PIPE_STENCIL_OP_DECR: +      return STENCILOP_DECRSAT; +   case PIPE_STENCIL_OP_INCR_WRAP: +      return STENCILOP_INCR; +   case PIPE_STENCIL_OP_DECR_WRAP: +      return STENCILOP_DECR; +   case PIPE_STENCIL_OP_INVERT: +      return STENCILOP_INVERT; +   default: +      return STENCILOP_ZERO; +   } +} + +static INLINE unsigned +i915_translate_blend_factor(unsigned factor) +{ +   switch (factor) { +   case PIPE_BLENDFACTOR_ZERO: +      return BLENDFACT_ZERO; +   case PIPE_BLENDFACTOR_SRC_ALPHA: +      return BLENDFACT_SRC_ALPHA; +   case PIPE_BLENDFACTOR_ONE: +      return BLENDFACT_ONE; +   case PIPE_BLENDFACTOR_SRC_COLOR: +      return BLENDFACT_SRC_COLR; +   case PIPE_BLENDFACTOR_INV_SRC_COLOR: +      return BLENDFACT_INV_SRC_COLR; +   case PIPE_BLENDFACTOR_DST_COLOR: +      return BLENDFACT_DST_COLR; +   case PIPE_BLENDFACTOR_INV_DST_COLOR: +      return BLENDFACT_INV_DST_COLR; +   case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +      return BLENDFACT_INV_SRC_ALPHA; +   case PIPE_BLENDFACTOR_DST_ALPHA: +      return BLENDFACT_DST_ALPHA; +   case PIPE_BLENDFACTOR_INV_DST_ALPHA: +      return BLENDFACT_INV_DST_ALPHA; +   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: +      return BLENDFACT_SRC_ALPHA_SATURATE; +   case PIPE_BLENDFACTOR_CONST_COLOR: +      return BLENDFACT_CONST_COLOR; +   case PIPE_BLENDFACTOR_INV_CONST_COLOR: +      return BLENDFACT_INV_CONST_COLOR; +   case PIPE_BLENDFACTOR_CONST_ALPHA: +      return BLENDFACT_CONST_ALPHA; +   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: +      return BLENDFACT_INV_CONST_ALPHA; +   default: +      return BLENDFACT_ZERO; +   } +} + +static INLINE unsigned +i915_translate_blend_func(unsigned mode) +{ +   switch (mode) { +   case PIPE_BLEND_ADD: +      return BLENDFUNC_ADD; +   case PIPE_BLEND_MIN: +      return BLENDFUNC_MIN; +   case PIPE_BLEND_MAX: +      return BLENDFUNC_MAX; +   case PIPE_BLEND_SUBTRACT: +      return BLENDFUNC_SUBTRACT; +   case PIPE_BLEND_REVERSE_SUBTRACT: +      return BLENDFUNC_REVERSE_SUBTRACT; +   default: +      return 0; +   } +} + + +static INLINE unsigned +i915_translate_logic_op(unsigned opcode) +{ +   switch (opcode) { +   case PIPE_LOGICOP_CLEAR: +      return LOGICOP_CLEAR; +   case PIPE_LOGICOP_AND: +      return LOGICOP_AND; +   case PIPE_LOGICOP_AND_REVERSE: +      return LOGICOP_AND_RVRSE; +   case PIPE_LOGICOP_COPY: +      return LOGICOP_COPY; +   case PIPE_LOGICOP_COPY_INVERTED: +      return LOGICOP_COPY_INV; +   case PIPE_LOGICOP_AND_INVERTED: +      return LOGICOP_AND_INV; +   case PIPE_LOGICOP_NOOP: +      return LOGICOP_NOOP; +   case PIPE_LOGICOP_XOR: +      return LOGICOP_XOR; +   case PIPE_LOGICOP_OR: +      return LOGICOP_OR; +   case PIPE_LOGICOP_OR_INVERTED: +      return LOGICOP_OR_INV; +   case PIPE_LOGICOP_NOR: +      return LOGICOP_NOR; +   case PIPE_LOGICOP_EQUIV: +      return LOGICOP_EQUIV; +   case PIPE_LOGICOP_INVERT: +      return LOGICOP_INV; +   case PIPE_LOGICOP_OR_REVERSE: +      return LOGICOP_OR_RVRSE; +   case PIPE_LOGICOP_NAND: +      return LOGICOP_NAND; +   case PIPE_LOGICOP_SET: +      return LOGICOP_SET; +   default: +      return LOGICOP_SET; +   } +} + + + +static INLINE boolean i915_validate_vertices( unsigned hw_prim, unsigned nr ) +{ +   boolean ok; + +   switch (hw_prim) { +   case PRIM3D_POINTLIST: +      ok = (nr >= 1); +      assert(ok); +      break; +   case PRIM3D_LINELIST: +      ok = (nr >= 2) && (nr % 2) == 0; +      assert(ok); +      break; +   case PRIM3D_LINESTRIP: +      ok = (nr >= 2); +      assert(ok); +      break; +   case PRIM3D_TRILIST: +      ok = (nr >= 3) && (nr % 3) == 0; +      assert(ok); +      break; +   case PRIM3D_TRISTRIP: +      ok = (nr >= 3); +      assert(ok); +      break; +   case PRIM3D_TRIFAN: +      ok = (nr >= 3); +      assert(ok); +      break; +   case PRIM3D_POLY: +      ok = (nr >= 3); +      assert(ok); +      break; +   default: +      assert(0); +      ok = 0; +      break; +   } + +   return ok; +} + +#endif diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c new file mode 100644 index 0000000000..c09c10601b --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -0,0 +1,299 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" + +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" + + +/* + * A note about min_lod & max_lod. + * + * There is a circular dependancy between the sampler state + * and the map state to be submitted to hw. + * + * Two condition must be meet: + * min_lod =< max_lod == true + * max_lod =< last_level == true + * + * + * This is all fine and dandy if it where for the fact that max_lod + * is set on the map state instead of the sampler state. That is + * the max_lod we submit on map is: + * max_lod = MIN2(last_level, max_lod); + * + * So we need to update the map state when we change samplers and + * we need to be change the sampler state when map state is changed. + * The first part is done by calling i915_update_texture in + * i915_update_samplers and the second part is done else where in + * code tracking the state changes. + */ + +static void +i915_update_texture(struct i915_context *i915, +                    uint unit, +                    const struct i915_texture *tex, +                    const struct i915_sampler_state *sampler, +                    uint state[6]); +/** + * Compute i915 texture sampling state. + * + * Recalculate all state from scratch.  Perhaps not the most + * efficient, but this has gotten complex enough that we need + * something which is understandable and reliable. + * \param state  returns the 3 words of compute state + */ +static void update_sampler(struct i915_context *i915, +                           uint unit, +			   const struct i915_sampler_state *sampler, +			   const struct i915_texture *tex, +			   unsigned state[3] ) +{ +   const struct pipe_texture *pt = &tex->base; +   unsigned minlod, lastlod; + +   /* Need to do this after updating the maps, which call the +    * intel_finalize_mipmap_tree and hence can update firstLevel: +    */ +   state[0] = sampler->state[0]; +   state[1] = sampler->state[1]; +   state[2] = sampler->state[2]; + +   if (pt->format == PIPE_FORMAT_YCBCR || +       pt->format == PIPE_FORMAT_YCBCR_REV) +      state[0] |= SS2_COLORSPACE_CONVERSION; + +   /* 3D textures don't seem to respect the border color. +    * Fallback if there's ever a danger that they might refer to +    * it.   +    *  +    * Effectively this means fallback on 3D clamp or +    * clamp_to_border. +    * +    * XXX: Check if this is true on i945.   +    * XXX: Check if this bug got fixed in release silicon. +    */ +#if 0 +   { +      const unsigned ws = sampler->templ->wrap_s; +      const unsigned wt = sampler->templ->wrap_t; +      const unsigned wr = sampler->templ->wrap_r; +      if (pt->target == PIPE_TEXTURE_3D && +          (sampler->templ->min_img_filter != PIPE_TEX_FILTER_NEAREST || +           sampler->templ->mag_img_filter != PIPE_TEX_FILTER_NEAREST) && +          (ws == PIPE_TEX_WRAP_CLAMP || +           wt == PIPE_TEX_WRAP_CLAMP || +           wr == PIPE_TEX_WRAP_CLAMP || +           ws == PIPE_TEX_WRAP_CLAMP_TO_BORDER || +           wt == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||  +           wr == PIPE_TEX_WRAP_CLAMP_TO_BORDER)) { +         if (i915->strict_conformance) { +            assert(0); +            /* 	    sampler->fallback = true; */ +            /* TODO */ +         } +      } +   } +#endif + +   /* See note at the top of file */ +   minlod = sampler->minlod; +   lastlod = pt->last_level << 4; + +   if (lastlod < minlod) { +      minlod = lastlod; +   } + +   state[1] |= (sampler->minlod << SS3_MIN_LOD_SHIFT); +   state[1] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); +} + + +void i915_update_samplers( struct i915_context *i915 ) +{ +   uint unit; + +   i915->current.sampler_enable_nr = 0; +   i915->current.sampler_enable_flags = 0x0; + +   for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers; +        unit++) { +      /* determine unit enable/disable by looking for a bound texture */ +      /* could also examine the fragment program? */ +      if (i915->texture[unit]) { +	 update_sampler( i915, +	                 unit, +	                 i915->sampler[unit],       /* sampler state */ +	                 i915->texture[unit],        /* texture */ +	                 i915->current.sampler[unit] /* the result */ +	                 ); +	 i915_update_texture( i915, +	                      unit, +	                      i915->texture[unit],          /* texture */ +	                      i915->sampler[unit],          /* sampler state */ +	                      i915->current.texbuffer[unit] ); + +	 i915->current.sampler_enable_nr++; +	 i915->current.sampler_enable_flags |= (1 << unit); +      } +   } + +   i915->hardware_dirty |= I915_HW_SAMPLER | I915_HW_MAP; +} + + +static uint +translate_texture_format(enum pipe_format pipeFormat) +{ +   switch (pipeFormat) { +   case PIPE_FORMAT_L8_UNORM: +      return MAPSURF_8BIT | MT_8BIT_L8; +   case PIPE_FORMAT_I8_UNORM: +      return MAPSURF_8BIT | MT_8BIT_I8; +   case PIPE_FORMAT_A8_UNORM: +      return MAPSURF_8BIT | MT_8BIT_A8; +   case PIPE_FORMAT_A8L8_UNORM: +      return MAPSURF_16BIT | MT_16BIT_AY88; +   case PIPE_FORMAT_R5G6B5_UNORM: +      return MAPSURF_16BIT | MT_16BIT_RGB565; +   case PIPE_FORMAT_A1R5G5B5_UNORM: +      return MAPSURF_16BIT | MT_16BIT_ARGB1555; +   case PIPE_FORMAT_A4R4G4B4_UNORM: +      return MAPSURF_16BIT | MT_16BIT_ARGB4444; +   case PIPE_FORMAT_A8R8G8B8_UNORM: +      return MAPSURF_32BIT | MT_32BIT_ARGB8888; +   case PIPE_FORMAT_YCBCR_REV: +      return (MAPSURF_422 | MT_422_YCRCB_NORMAL); +   case PIPE_FORMAT_YCBCR: +      return (MAPSURF_422 | MT_422_YCRCB_SWAPY); +#if 0 +   case PIPE_FORMAT_RGB_FXT1: +   case PIPE_FORMAT_RGBA_FXT1: +      return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1); +#endif +   case PIPE_FORMAT_Z16_UNORM: +      return (MAPSURF_16BIT | MT_16BIT_L16); +#if 0 +   case PIPE_FORMAT_RGBA_DXT1: +   case PIPE_FORMAT_RGB_DXT1: +      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1); +   case PIPE_FORMAT_RGBA_DXT3: +      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3); +   case PIPE_FORMAT_RGBA_DXT5: +      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); +#endif +   case PIPE_FORMAT_S8Z24_UNORM: +      return (MAPSURF_32BIT | MT_32BIT_xI824); +   default: +      debug_printf("i915: translate_texture_format() bad image format %x\n", +              pipeFormat); +      assert(0); +      return 0; +   } +} + + +static void +i915_update_texture(struct i915_context *i915, +                    uint unit, +                    const struct i915_texture *tex, +                    const struct i915_sampler_state *sampler, +                    uint state[6]) +{ +   const struct pipe_texture *pt = &tex->base; +   uint format, pitch; +   const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; +   const uint num_levels = pt->last_level; +   unsigned max_lod = num_levels * 4; +   unsigned tiled = MS3_USE_FENCE_REGS; + +   assert(tex); +   assert(width); +   assert(height); +   assert(depth); + +   format = translate_texture_format(pt->format); +   pitch = tex->stride; + +   assert(format); +   assert(pitch); + +   if (tex->tiled) { +      assert(!((pitch - 1) & pitch)); +      tiled = MS3_TILED_SURFACE; +   } + +   /* MS3 state */ +   state[0] = +      (((height - 1) << MS3_HEIGHT_SHIFT) +       | ((width - 1) << MS3_WIDTH_SHIFT) +       | format +       | tiled); + +   /* +    * XXX When min_filter != mag_filter and there's just one mipmap level, +    * set max_lod = 1 to make sure i915 chooses between min/mag filtering. +    */ + +   /* See note at the top of file */ +   if (max_lod > (sampler->maxlod >> 2)) +      max_lod = sampler->maxlod >> 2; + +   /* MS4 state */ +   state[1] = +      ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) +       | MS4_CUBE_FACE_ENA_MASK +       | ((max_lod) << MS4_MAX_LOD_SHIFT) +       | ((depth - 1) << MS4_VOLUME_DEPTH_SHIFT)); +} + + +void +i915_update_textures(struct i915_context *i915) +{ +   uint unit; + +   for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers; +        unit++) { +      /* determine unit enable/disable by looking for a bound texture */ +      /* could also examine the fragment program? */ +      if (i915->texture[unit]) { +	 i915_update_texture( i915, +	                      unit, +	                      i915->texture[unit],          /* texture */ +	                      i915->sampler[unit],          /* sampler state */ +	                      i915->current.texbuffer[unit] ); +      } +   } + +   i915->hardware_dirty |= I915_HW_MAP; +} diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c new file mode 100644 index 0000000000..94e2deaf61 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -0,0 +1,126 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "i915_context.h" +#include "i915_blit.h" +#include "i915_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_tile.h" +#include "util/u_rect.h" + + +/* Assumes all values are within bounds -- no checking at this level - + * do it higher up if required. + */ +static void +i915_surface_copy(struct pipe_context *pipe, +                  boolean do_flip, +		  struct pipe_surface *dst, +		  unsigned dstx, unsigned dsty, +		  struct pipe_surface *src, +		  unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ +   assert( dst != src ); +   assert( dst->block.size == src->block.size ); +   assert( dst->block.width == src->block.height ); +   assert( dst->block.height == src->block.height ); + +   if (0) { +      void *dst_map = pipe->screen->surface_map( pipe->screen, +                                                 dst, +                                                 PIPE_BUFFER_USAGE_CPU_WRITE ); +       +      const void *src_map = pipe->screen->surface_map( pipe->screen, +                                                       src, +                                                       PIPE_BUFFER_USAGE_CPU_READ ); +       +      pipe_copy_rect(dst_map, +                     &dst->block, +                     dst->stride, +                     dstx, dsty, +                     width, height, +                     src_map, +                     do_flip ? -(int) src->stride : src->stride, +                     srcx, do_flip ? height - 1 - srcy : srcy); + +      pipe->screen->surface_unmap(pipe->screen, src); +      pipe->screen->surface_unmap(pipe->screen, dst); +   } +   else { +      struct i915_texture *dst_tex = (struct i915_texture *)dst->texture; +      struct i915_texture *src_tex = (struct i915_texture *)src->texture; +      assert(dst->block.width == 1); +      assert(dst->block.height == 1); +      i915_copy_blit( i915_context(pipe), +                      do_flip, +                      dst->block.size, +		      (unsigned short) src->stride, src_tex->buffer, src->offset, +		      (unsigned short) dst->stride, dst_tex->buffer, dst->offset, +		      (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); +   } +} + + +static void +i915_surface_fill(struct pipe_context *pipe, +		  struct pipe_surface *dst, +		  unsigned dstx, unsigned dsty, +		  unsigned width, unsigned height, unsigned value) +{ +   if (0) { +      void *dst_map = pipe->screen->surface_map( pipe->screen, +                                                 dst, +                                                 PIPE_BUFFER_USAGE_CPU_WRITE ); + +      pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); + +      pipe->screen->surface_unmap(pipe->screen, dst); +   } +   else { +      struct i915_texture *tex = (struct i915_texture *)dst->texture; +      assert(dst->block.width == 1); +      assert(dst->block.height == 1); +      i915_fill_blit( i915_context(pipe), +		      dst->block.size, +		      (unsigned short) dst->stride, +		      tex->buffer, dst->offset, +		      (short) dstx, (short) dsty, +		      (short) width, (short) height, +		      value ); +   } +} + + +void +i915_init_surface_functions(struct i915_context *i915) +{ +   i915->pipe.surface_copy = i915_surface_copy; +   i915->pipe.surface_fill = i915_surface_fill; +} diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c new file mode 100644 index 0000000000..b2ca3a2286 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -0,0 +1,771 @@ +/************************************************************************** + *  + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  *   Michel Dänzer <michel@tungstengraphics.com> +  */ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "i915_context.h" +#include "i915_texture.h" +#include "i915_debug.h" +#include "i915_screen.h" + +/* + * Helper function and arrays + */ + +/** + * Initial offset for Cube map. + */ +static const int initial_offsets[6][2] = { +   {0, 0}, +   {0, 2}, +   {1, 0}, +   {1, 2}, +   {1, 1}, +   {1, 3} +}; + +/** + * Step offsets for Cube map. + */ +static const int step_offsets[6][2] = { +   {0, 2}, +   {0, 2}, +   {-1, 2}, +   {-1, 2}, +   {-1, 1}, +   {-1, 1} +}; + +static unsigned minify( unsigned d ) +{ +   return MAX2(1, d>>1); +} + +static unsigned +power_of_two(unsigned x) +{ +   unsigned value = 1; +   while (value < x) +      value = value << 1; +   return value; +} + +static unsigned +round_up(unsigned n, unsigned multiple) +{ +   return (n + multiple - 1) & ~(multiple - 1); +} + + +/* + * More advanced helper funcs + */ + + +static void +i915_miptree_set_level_info(struct i915_texture *tex, +                             unsigned level, +                             unsigned nr_images, +                             unsigned w, unsigned h, unsigned d) +{ +   struct pipe_texture *pt = &tex->base; + +   assert(level < PIPE_MAX_TEXTURE_LEVELS); + +   pt->width[level] = w; +   pt->height[level] = h; +   pt->depth[level] = d; +    +   pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); +   pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); + +   tex->nr_images[level] = nr_images; + +   /* +   DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, +       level, w, h, d, x, y, tex->level_offset[level]); +   */ + +   /* Not sure when this would happen, but anyway:  +    */ +   if (tex->image_offset[level]) { +      FREE(tex->image_offset[level]); +      tex->image_offset[level] = NULL; +   } + +   assert(nr_images); +   assert(!tex->image_offset[level]); + +   tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned)); +   tex->image_offset[level][0] = 0; +} + +static void +i915_miptree_set_image_offset(struct i915_texture *tex, +			      unsigned level, unsigned img, unsigned x, unsigned y) +{ +   if (img == 0 && level == 0) +      assert(x == 0 && y == 0); + +   assert(img < tex->nr_images[level]); + +   tex->image_offset[level][img] = y * tex->stride + x * tex->base.block.size; + +   /* +   printf("%s level %d img %d pos %d,%d image_offset %x\n", +       __FUNCTION__, level, img, x, y, tex->image_offset[level][img]); +   */ +} + + +/* + * Layout functions + */ + + +/** + * Special case to deal with display targets. + */ +static boolean +i915_displaytarget_layout(struct i915_texture *tex) +{ +   struct pipe_texture *pt = &tex->base; + +   if (pt->last_level > 0 || pt->block.size != 4) +      return 0; + +   i915_miptree_set_level_info( tex, 0, 1, +                                tex->base.width[0], +                                tex->base.height[0], +                                1 ); +   i915_miptree_set_image_offset( tex, 0, 0, 0, 0 ); + +   if (tex->base.width[0] >= 128) { +      tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); +      tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); +#if 0 /* used for tiled display targets */ +      tex->tiled = 1; +#endif +   } else { +      tex->stride = round_up(tex->base.nblocksx[0] * pt->block.size, 64); +      tex->total_nblocksy = tex->base.nblocksy[0]; +   } + +   /* +   printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, +      tex->base.width[0], tex->base.height[0], pt->block.size, +      tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); +   */ + +   return 1; +} + +static void +i945_miptree_layout_2d( struct i915_texture *tex ) +{ +   struct pipe_texture *pt = &tex->base; +   const int align_x = 2, align_y = 4; +   unsigned level; +   unsigned x = 0; +   unsigned y = 0; +   unsigned width = pt->width[0]; +   unsigned height = pt->height[0]; +   unsigned nblocksx = pt->nblocksx[0]; +   unsigned nblocksy = pt->nblocksy[0]; + +   /* used for tiled display targets */ +   if (0) +      if (i915_displaytarget_layout(tex)) +	 return; + +   tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + +   /* May need to adjust pitch to accomodate the placement of +    * the 2nd mipmap level.  This occurs when the alignment +    * constraints of mipmap placement push the right edge of the +    * 2nd mipmap level out past the width of its parent. +    */ +   if (pt->last_level > 0) { +      unsigned mip1_nblocksx  +	 = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) +         + pf_get_nblocksx(&pt->block, minify(minify(width))); + +      if (mip1_nblocksx > nblocksx) +	 tex->stride = mip1_nblocksx * pt->block.size; +   } + +   /* Pitch must be a whole number of dwords +    */ +   tex->stride = align(tex->stride, 64); +   tex->total_nblocksy = 0; + +   for (level = 0; level <= pt->last_level; level++) { +      i915_miptree_set_level_info(tex, level, 1, width, height, 1); +      i915_miptree_set_image_offset(tex, level, 0, x, y); + +      nblocksy = align(nblocksy, align_y); + +      /* Because the images are packed better, the final offset +       * might not be the maximal one: +       */ +      tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy); + +      /* Layout_below: step right after second mipmap level. +       */ +      if (level == 1) { +	 x += align(nblocksx, align_x); +      } +      else { +	 y += nblocksy; +      } + +      width  = minify(width); +      height = minify(height); +      nblocksx = pf_get_nblocksx(&pt->block, width); +      nblocksy = pf_get_nblocksy(&pt->block, height); +   } +} + +static void +i945_miptree_layout_cube(struct i915_texture *tex) +{ +   struct pipe_texture *pt = &tex->base; +   unsigned level; + +   const unsigned nblocks = pt->nblocksx[0]; +   unsigned face; +   unsigned width = pt->width[0]; +   unsigned height = pt->height[0]; + +   /* +   printf("%s %i, %i\n", __FUNCTION__, pt->width[0], pt->height[0]); +   */ + +   assert(width == height); /* cubemap images are square */ + +   /* +    * XXX Should only be used for compressed formats. But lets +    * keep this code active just in case. +    * +    * Depending on the size of the largest images, pitch can be +    * determined either by the old-style packing of cubemap faces, +    * or the final row of 4x4, 2x2 and 1x1 faces below this. +    */ +   if (nblocks > 32) +      tex->stride = round_up(nblocks * pt->block.size * 2, 4); +   else +      tex->stride = 14 * 8 * pt->block.size; + +   tex->total_nblocksy = nblocks * 4; + +   /* Set all the levels to effectively occupy the whole rectangular region. +   */ +   for (level = 0; level <= pt->last_level; level++) { +      i915_miptree_set_level_info(tex, level, 6, width, height, 1); +      width /= 2; +      height /= 2; +   } + +   for (face = 0; face < 6; face++) { +      unsigned x = initial_offsets[face][0] * nblocks; +      unsigned y = initial_offsets[face][1] * nblocks; +      unsigned d = nblocks; + +#if 0 /* Fix and enable this code for compressed formats */ +      if (nblocks == 4 && face >= 4) { +         y = tex->total_height - 4; +         x = (face - 4) * 8; +      } +      else if (nblocks < 4 && (face > 0)) { +         y = tex->total_height - 4; +         x = face * 8; +      } +#endif + +      for (level = 0; level <= pt->last_level; level++) { +         i915_miptree_set_image_offset(tex, level, face, x, y); + +         d >>= 1; + +#if 0 /* Fix and enable this code for compressed formats */ +         switch (d) { +            case 4: +               switch (face) { +                  case PIPE_TEX_FACE_POS_X: +                  case PIPE_TEX_FACE_NEG_X: +                     x += step_offsets[face][0] * d; +                     y += step_offsets[face][1] * d; +                     break; +                  case PIPE_TEX_FACE_POS_Y: +                  case PIPE_TEX_FACE_NEG_Y: +                     y += 12; +                     x -= 8; +                     break; +                  case PIPE_TEX_FACE_POS_Z: +                  case PIPE_TEX_FACE_NEG_Z: +                     y = tex->total_height - 4; +                     x = (face - 4) * 8; +                     break; +               } +            case 2: +               y = tex->total_height - 4; +               x = 16 + face * 8; +               break; + +            case 1: +               x += 48; +               break; +            default: +#endif +               x += step_offsets[face][0] * d; +               y += step_offsets[face][1] * d; +#if 0 +               break; +         } +#endif +      } +   } +} + +static boolean +i915_miptree_layout(struct i915_texture * tex) +{ +   struct pipe_texture *pt = &tex->base; +   unsigned level; + +   switch (pt->target) { +   case PIPE_TEXTURE_CUBE: { +         const unsigned nblocks = pt->nblocksx[0]; +         unsigned face; +         unsigned width = pt->width[0], height = pt->height[0]; + +         assert(width == height); /* cubemap images are square */ + +         /* double pitch for cube layouts */ +         tex->stride = round_up(nblocks * pt->block.size * 2, 4); +         tex->total_nblocksy = nblocks * 4; + +         for (level = 0; level <= pt->last_level; level++) { +            i915_miptree_set_level_info(tex, level, 6, +                                         width, height, +                                         1); +            width /= 2; +            height /= 2; +         } + +         for (face = 0; face < 6; face++) { +            unsigned x = initial_offsets[face][0] * nblocks; +            unsigned y = initial_offsets[face][1] * nblocks; +            unsigned d = nblocks; + +            for (level = 0; level <= pt->last_level; level++) { +               i915_miptree_set_image_offset(tex, level, face, x, y); +               d >>= 1; +               x += step_offsets[face][0] * d; +               y += step_offsets[face][1] * d; +            } +         } +         break; +      } +   case PIPE_TEXTURE_3D:{ +         unsigned width = pt->width[0]; +         unsigned height = pt->height[0]; +         unsigned depth = pt->depth[0]; +         unsigned nblocksx = pt->nblocksx[0]; +         unsigned nblocksy = pt->nblocksy[0]; +         unsigned stack_nblocksy = 0; + +         /* Calculate the size of a single slice.  +          */ +         tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + +         /* XXX: hardware expects/requires 9 levels at minimum. +          */ +         for (level = 0; level <= MAX2(8, pt->last_level); +              level++) { +            i915_miptree_set_level_info(tex, level, depth, +                                        width, height, depth); + + +            stack_nblocksy += MAX2(2, nblocksy); + +            width = minify(width); +            height = minify(height); +            depth = minify(depth); +            nblocksx = pf_get_nblocksx(&pt->block, width); +            nblocksy = pf_get_nblocksy(&pt->block, height); +         } + +         /* Fixup depth image_offsets:  +          */ +         depth = pt->depth[0]; +         for (level = 0; level <= pt->last_level; level++) { +            unsigned i; +            for (i = 0; i < depth; i++)  +               i915_miptree_set_image_offset(tex, level, i, +                                             0, i * stack_nblocksy); + +            depth = minify(depth); +         } + + +         /* Multiply slice size by texture depth for total size.  It's +          * remarkable how wasteful of memory the i915 texture layouts +          * are.  They are largely fixed in the i945. +          */ +         tex->total_nblocksy = stack_nblocksy * pt->depth[0]; +         break; +      } + +   default:{ +         unsigned width = pt->width[0]; +         unsigned height = pt->height[0]; +         unsigned nblocksx = pt->nblocksx[0]; +         unsigned nblocksy = pt->nblocksy[0]; + +         tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); +         tex->total_nblocksy = 0; + +         for (level = 0; level <= pt->last_level; level++) { +            i915_miptree_set_level_info(tex, level, 1, +                                        width, height, 1); +            i915_miptree_set_image_offset(tex, level, 0, +                                          0, tex->total_nblocksy); + +            nblocksy = round_up(MAX2(2, nblocksy), 2); + +	    tex->total_nblocksy += nblocksy; + +            width = minify(width); +            height = minify(height); +            nblocksx = pf_get_nblocksx(&pt->block, width); +            nblocksy = pf_get_nblocksy(&pt->block, height); +         } +         break; +      } +   } +   /* +   DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, +       tex->pitch, +       tex->total_nblocksy, pt->block.size, tex->stride * tex->total_nblocksy); +   */ + +   return TRUE; +} + + +static boolean +i945_miptree_layout(struct i915_texture * tex) +{ +   struct pipe_texture *pt = &tex->base; +   unsigned level; + +   switch (pt->target) { +   case PIPE_TEXTURE_CUBE: +      i945_miptree_layout_cube(tex); +      break; +   case PIPE_TEXTURE_3D:{ +         unsigned width = pt->width[0]; +         unsigned height = pt->height[0]; +         unsigned depth = pt->depth[0]; +         unsigned nblocksx = pt->nblocksx[0]; +         unsigned nblocksy = pt->nblocksy[0]; +         unsigned pack_x_pitch, pack_x_nr; +         unsigned pack_y_pitch; + +         tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); +         tex->total_nblocksy = 0; + +         pack_y_pitch = MAX2(pt->nblocksy[0], 2); +         pack_x_pitch = tex->stride / pt->block.size; +         pack_x_nr = 1; + +         for (level = 0; level <= pt->last_level; level++) { +            unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6; +            int x = 0; +            int y = 0; +            unsigned q, j; + +            i915_miptree_set_level_info(tex, level, nr_images, +                                        width, height, depth); + +            for (q = 0; q < nr_images;) { +               for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { +                  i915_miptree_set_image_offset(tex, level, q, x, y + tex->total_nblocksy); +                  x += pack_x_pitch; +               } + +               x = 0; +               y += pack_y_pitch; +            } + + +            tex->total_nblocksy += y; + +            if (pack_x_pitch > 4) { +               pack_x_pitch >>= 1; +               pack_x_nr <<= 1; +               assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride); +            } + +            if (pack_y_pitch > 2) { +               pack_y_pitch >>= 1; +            } + +            width = minify(width); +            height = minify(height); +            depth = minify(depth); +            nblocksx = pf_get_nblocksx(&pt->block, width); +            nblocksy = pf_get_nblocksy(&pt->block, height); +         } +         break; +      } + +   case PIPE_TEXTURE_1D: +   case PIPE_TEXTURE_2D: +//   case PIPE_TEXTURE_RECTANGLE: +         i945_miptree_layout_2d(tex); +         break; +   default: +      assert(0); +      return FALSE; +   } + +   /* +   DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, +       tex->pitch, +       tex->total_nblocksy, pt->block.size, tex->stride * tex->total_nblocksy); +   */ + +   return TRUE; +} + + +static struct pipe_texture * +i915_texture_create(struct pipe_screen *screen, +                    const struct pipe_texture *templat) +{ +   struct i915_screen *i915screen = i915_screen(screen); +   struct pipe_winsys *ws = screen->winsys; +   struct i915_texture *tex = CALLOC_STRUCT(i915_texture); +   size_t tex_size; + +   if (!tex) +      return NULL; + +   tex->base = *templat; +   tex->base.refcount = 1; +   tex->base.screen = screen; + +   tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); +   tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); +    +   if (i915screen->is_i945) { +      if (!i945_miptree_layout(tex)) +	 goto fail; +   } else { +      if (!i915_miptree_layout(tex)) +	 goto fail; +   } + +   tex_size = tex->stride * tex->total_nblocksy; + +   tex->buffer = ws->buffer_create(ws, 64, +                                    PIPE_BUFFER_USAGE_PIXEL, +                                    tex_size); + +   if (!tex->buffer) +      goto fail; + +#if 0 +   void *ptr = ws->buffer_map(ws, tex->buffer, +      PIPE_BUFFER_USAGE_CPU_WRITE); +   memset(ptr, 0x80, tex_size); +   ws->buffer_unmap(ws, tex->buffer); +#endif + +   return &tex->base; + +fail: +   FREE(tex); +   return NULL; +} + + +static void +i915_texture_release(struct pipe_screen *screen, +                     struct pipe_texture **pt) +{ +   if (!*pt) +      return; + +   /* +   DBG("%s %p refcount will be %d\n", +       __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); +   */ +   if (--(*pt)->refcount <= 0) { +      struct i915_texture *tex = (struct i915_texture *)*pt; +      uint i; + +      /* +      DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); +      */ + +      pipe_buffer_reference(screen, &tex->buffer, NULL); + +      for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) +         if (tex->image_offset[i]) +            FREE(tex->image_offset[i]); + +      FREE(tex); +   } +   *pt = NULL; +} + +static struct pipe_surface * +i915_get_tex_surface(struct pipe_screen *screen, +                     struct pipe_texture *pt, +                     unsigned face, unsigned level, unsigned zslice, +                     unsigned flags) +{ +   struct i915_texture *tex = (struct i915_texture *)pt; +   struct pipe_surface *ps; +   unsigned offset;  /* in bytes */ + +   if (pt->target == PIPE_TEXTURE_CUBE) { +      offset = tex->image_offset[level][face]; +   } +   else if (pt->target == PIPE_TEXTURE_3D) { +      offset = tex->image_offset[level][zslice]; +   } +   else { +      offset = tex->image_offset[level][0]; +      assert(face == 0); +      assert(zslice == 0); +   } + +   ps = CALLOC_STRUCT(pipe_surface); +   if (ps) { +      ps->refcount = 1; +      pipe_texture_reference(&ps->texture, pt); +      ps->format = pt->format; +      ps->width = pt->width[level]; +      ps->height = pt->height[level]; +      ps->block = pt->block; +      ps->nblocksx = pt->nblocksx[level]; +      ps->nblocksy = pt->nblocksy[level]; +      ps->stride = tex->stride; +      ps->offset = offset; +      ps->usage = flags; +      ps->status = PIPE_SURFACE_STATUS_DEFINED; +   } +   return ps; +} + +static struct pipe_texture * +i915_texture_blanket(struct pipe_screen * screen, +                     const struct pipe_texture *base, +                     const unsigned *stride, +                     struct pipe_buffer *buffer) +{ +   struct i915_texture *tex; +   assert(screen); + +   /* Only supports one type */ +   if (base->target != PIPE_TEXTURE_2D || +       base->last_level != 0 || +       base->depth[0] != 1) { +      return NULL; +   } + +   tex = CALLOC_STRUCT(i915_texture); +   if (!tex) +      return NULL; + +   tex->base = *base; +   tex->base.refcount = 1; +   tex->base.screen = screen; + +   tex->stride = stride[0]; + +   i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); +   i915_miptree_set_image_offset(tex, 0, 0, 0, 0); + +   pipe_buffer_reference(screen, &tex->buffer, buffer); + +   return &tex->base; +} + +void +i915_init_texture_functions(struct i915_context *i915) +{ +//   i915->pipe.texture_update = i915_texture_update; +} + +static void +i915_tex_surface_release(struct pipe_screen *screen, +                         struct pipe_surface **surface) +{ +   struct pipe_surface *surf = *surface; + +   if (--surf->refcount == 0) { + +      /* This really should not be possible, but it's actually +       * happening quite a bit...  Will fix. +       */ +      if (surf->status == PIPE_SURFACE_STATUS_CLEAR) { +         debug_printf("XXX destroying a surface with pending clears...\n"); +         assert(0); +      } + +      pipe_texture_reference(&surf->texture, NULL); +      FREE(surf); +   } + +   *surface = NULL; +} + +void +i915_init_screen_texture_functions(struct pipe_screen *screen) +{ +   screen->texture_create = i915_texture_create; +   screen->texture_release = i915_texture_release; +   screen->get_tex_surface = i915_get_tex_surface; +   screen->texture_blanket = i915_texture_blanket; +   screen->tex_surface_release = i915_tex_surface_release; +} diff --git a/src/gallium/drivers/i915simple/i915_texture.h b/src/gallium/drivers/i915simple/i915_texture.h new file mode 100644 index 0000000000..7225016a9f --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_texture.h @@ -0,0 +1,43 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef I915_TEXTURE_H +#define I915_TEXTURE_H + +struct i915_context; +struct pipe_screen; + + +extern void +i915_init_texture_functions(struct i915_context *i915); + + +extern void +i915_init_screen_texture_functions(struct pipe_screen *screen); + + +#endif /* I915_TEXTURE_H */ diff --git a/src/gallium/drivers/i915simple/i915_winsys.h b/src/gallium/drivers/i915simple/i915_winsys.h new file mode 100644 index 0000000000..81904c2a74 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_winsys.h @@ -0,0 +1,121 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * \file + * This is the interface that i915simple requires any window system + * hosting it to implement.  This is the only include file in i915simple + * which is public. + *  + */ + +#ifndef I915_WINSYS_H +#define I915_WINSYS_H + + +#include "pipe/p_defines.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +    +/* Pipe drivers are (meant to be!) independent of both GL and the + * window system.  The window system provides a buffer manager and a + * set of additional hooks for things like command buffer submission, + * etc. + * + * There clearly has to be some agreement between the window system + * driver and the hardware driver about the format of command buffers, + * etc. + */ + +struct i915_batchbuffer; +struct pipe_buffer; +struct pipe_fence_handle; +struct pipe_winsys; +struct pipe_screen; + + +/** + * Additional winsys interface for i915simple. + *  + * It is an over-simple batchbuffer mechanism.  Will want to improve the + * performance of this, perhaps based on the cmdstream stuff.  It + * would be pretty impossible to implement swz on top of this + * interface. + * + * Will also need additions/changes to implement static/dynamic + * indirect state. + */ +struct i915_winsys { + +   void (*destroy)( struct i915_winsys *sws ); +    +   /** +    * Get the current batch buffer from the winsys. +    */ +   struct i915_batchbuffer *(*batch_get)( struct i915_winsys *sws ); + +   /** +    * Emit a relocation to a buffer. +    *  +    * Used not only when the buffer addresses are not pinned, but also to  +    * ensure refered buffers will not be destroyed until the current batch  +    * buffer execution is finished. +    * +    * The access flags is a combination of I915_BUFFER_ACCESS_WRITE and  +    * I915_BUFFER_ACCESS_READ macros. +    */ +   void (*batch_reloc)( struct i915_winsys *sws, +			struct pipe_buffer *buf, +			unsigned access_flags, +			unsigned delta ); + +   /** +    * Flush the batch. +    */ +   void (*batch_flush)( struct i915_winsys *sws, +                        struct pipe_fence_handle **fence ); +}; + +#define I915_BUFFER_ACCESS_WRITE   0x1  +#define I915_BUFFER_ACCESS_READ    0x2 + +#define I915_BUFFER_USAGE_LIT_VERTEX  (PIPE_BUFFER_USAGE_CUSTOM << 0) + + +struct pipe_context *i915_create_context( struct pipe_screen *, +                                          struct pipe_winsys *, +                                          struct i915_winsys * ); + +#ifdef __cplusplus +} +#endif + +#endif  diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile new file mode 100644 index 0000000000..e97146e57c --- /dev/null +++ b/src/gallium/drivers/i965simple/Makefile @@ -0,0 +1,54 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = i965simple + +C_SOURCES = \ +	brw_blit.c \ +	brw_flush.c \ +	brw_screen.c \ +	brw_surface.c \ +	brw_cc.c \ +	brw_clip.c \ +	brw_clip_line.c \ +	brw_clip_point.c \ +	brw_clip_state.c \ +	brw_clip_tri.c \ +	brw_clip_util.c \ +	brw_context.c \ +	brw_curbe.c \ +	brw_draw.c \ +	brw_draw_upload.c \ +	brw_eu.c \ +	brw_eu_debug.c \ +	brw_eu_emit.c \ +	brw_eu_util.c \ +	brw_gs.c \ +	brw_gs_emit.c \ +	brw_gs_state.c \ +	brw_misc_state.c \ +	brw_sf.c \ +	brw_sf_emit.c \ +	brw_sf_state.c \ +	brw_state.c \ +	brw_state_batch.c \ +	brw_state_cache.c \ +	brw_state_pool.c \ +	brw_state_upload.c \ +	brw_tex_layout.c \ +	brw_urb.c \ +	brw_util.c \ +	brw_vs.c \ +	brw_vs_emit.c \ +	brw_vs_state.c \ +	brw_wm.c \ +	brw_wm_iz.c \ +	brw_wm_decl.c \ +	brw_wm_glsl.c \ +	brw_wm_sampler_state.c \ +	brw_wm_state.c \ +	brw_wm_surface_state.c + +include ../../Makefile.template + +symlinks: diff --git a/src/gallium/drivers/i965simple/SConscript b/src/gallium/drivers/i965simple/SConscript new file mode 100644 index 0000000000..43fc2a4005 --- /dev/null +++ b/src/gallium/drivers/i965simple/SConscript @@ -0,0 +1,54 @@ +Import('*') + +env = env.Clone() + +i965simple = env.ConvenienceLibrary( +	target = 'i965simple', +	source = [ +		'brw_blit.c', +		'brw_cc.c', +		'brw_clip.c', +		'brw_clip_line.c', +		'brw_clip_point.c', +		'brw_clip_state.c', +		'brw_clip_tri.c', +		'brw_clip_util.c', +		'brw_context.c', +		'brw_curbe.c', +		'brw_draw.c', +		'brw_draw_upload.c', +		'brw_eu.c', +		'brw_eu_debug.c', +		'brw_eu_emit.c', +		'brw_eu_util.c', +		'brw_flush.c', +		'brw_gs.c', +		'brw_gs_emit.c', +		'brw_gs_state.c', +		'brw_misc_state.c', +		'brw_screen.c', +		'brw_sf.c', +		'brw_sf_emit.c', +		'brw_sf_state.c', +		'brw_state.c', +		'brw_state_batch.c', +		'brw_state_cache.c', +		'brw_state_pool.c', +		'brw_state_upload.c', +		'brw_surface.c', +		'brw_tex_layout.c', +		'brw_urb.c', +		'brw_util.c', +		'brw_vs.c', +		'brw_vs_emit.c', +		'brw_vs_state.c', +		'brw_wm.c', +		'brw_wm_decl.c', +		'brw_wm_glsl.c', +		'brw_wm_iz.c', +		'brw_wm_sampler_state.c', +		'brw_wm_state.c', +		'brw_wm_surface_state.c', +	]) + +Export('i965simple') diff --git a/src/gallium/drivers/i965simple/brw_batch.h b/src/gallium/drivers/i965simple/brw_batch.h new file mode 100644 index 0000000000..5f5932a488 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_batch.h @@ -0,0 +1,59 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_BATCH_H +#define BRW_BATCH_H + +#include "brw_winsys.h" + +#define BATCH_LOCALS + +#define INTEL_BATCH_NO_CLIPRECTS 0x1 +#define INTEL_BATCH_CLIPRECTS    0x2 + +#define BEGIN_BATCH( dwords, relocs ) \ +   brw->winsys->batch_start(brw->winsys, dwords, relocs) + +#define OUT_BATCH( dword ) \ +   brw->winsys->batch_dword(brw->winsys, dword) + +#define OUT_RELOC( buf, flags, delta ) \ +   brw->winsys->batch_reloc(brw->winsys, buf, flags, delta) + +#define ADVANCE_BATCH() \ +   brw->winsys->batch_end( brw->winsys ) + +/* XXX: this is bogus - need proper handling for out-of-memory in batchbuffer. + */ +#define FLUSH_BATCH(fence) do {				\ +   brw->winsys->batch_flush(brw->winsys, fence);	\ +   brw->hardware_dirty = ~0;				\ +} while (0) + +#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->winsys, (s), sizeof(*(s))) + +#endif diff --git a/src/gallium/drivers/i965simple/brw_blit.c b/src/gallium/drivers/i965simple/brw_blit.c new file mode 100644 index 0000000000..4d11f8d2ab --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_blit.c @@ -0,0 +1,218 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdio.h> +#include <errno.h> + +#include "brw_batch.h" +#include "brw_blit.h" +#include "brw_context.h" +#include "brw_reg.h" + +#include "pipe/p_context.h" +#include "pipe/internal/p_winsys_screen.h" + +#define FILE_DEBUG_FLAG DEBUG_BLIT + +void brw_fill_blit(struct brw_context *brw, +                   unsigned cpp, +                   short dst_pitch, +                   struct pipe_buffer *dst_buffer, +                   unsigned dst_offset, +                   boolean dst_tiled, +                   short x, short y, +                   short w, short h, +                   unsigned color) +{ +   unsigned BR13, CMD; +   BATCH_LOCALS; + +   dst_pitch *= cpp; + +   switch(cpp) { +   case 1: +   case 2: +   case 3: +      BR13 = (0xF0 << 16) | (1<<24); +      CMD = XY_COLOR_BLT_CMD; +      break; +   case 4: +      BR13 = (0xF0 << 16) | (1<<24) | (1<<25); +      CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; +      break; +   default: +      return; +   } + +   if (dst_tiled) { +      CMD |= XY_DST_TILED; +      dst_pitch /= 4; +   } + +   BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); +   OUT_BATCH( CMD ); +   OUT_BATCH( dst_pitch | BR13 ); +   OUT_BATCH( (y << 16) | x ); +   OUT_BATCH( ((y+h) << 16) | (x+w) ); +   OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, dst_offset ); +   OUT_BATCH( color ); +   ADVANCE_BATCH(); +} + +static unsigned translate_raster_op(unsigned logicop) +{ +   switch(logicop) { +   case PIPE_LOGICOP_CLEAR: return 0x00; +   case PIPE_LOGICOP_AND: return 0x88; +   case PIPE_LOGICOP_AND_REVERSE: return 0x44; +   case PIPE_LOGICOP_COPY: return 0xCC; +   case PIPE_LOGICOP_AND_INVERTED: return 0x22; +   case PIPE_LOGICOP_NOOP: return 0xAA; +   case PIPE_LOGICOP_XOR: return 0x66; +   case PIPE_LOGICOP_OR: return 0xEE; +   case PIPE_LOGICOP_NOR: return 0x11; +   case PIPE_LOGICOP_EQUIV: return 0x99; +   case PIPE_LOGICOP_INVERT: return 0x55; +   case PIPE_LOGICOP_OR_REVERSE: return 0xDD; +   case PIPE_LOGICOP_COPY_INVERTED: return 0x33; +   case PIPE_LOGICOP_OR_INVERTED: return 0xBB; +   case PIPE_LOGICOP_NAND: return 0x77; +   case PIPE_LOGICOP_SET: return 0xFF; +   default: return 0; +   } +} + + +/* Copy BitBlt + */ +void brw_copy_blit(struct brw_context *brw, +                   unsigned do_flip, +                   unsigned cpp, +                   short src_pitch, +                   struct pipe_buffer *src_buffer, +                   unsigned  src_offset, +                   boolean src_tiled, +                   short dst_pitch, +                   struct pipe_buffer *dst_buffer, +                   unsigned  dst_offset, +                   boolean dst_tiled, +                   short src_x, short src_y, +                   short dst_x, short dst_y, +                   short w, short h, +                   unsigned logic_op) +{ +   unsigned CMD, BR13; +   int dst_y2 = dst_y + h; +   int dst_x2 = dst_x + w; +   BATCH_LOCALS; + + +   DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d op:%d\n", +       __FUNCTION__, +       src_buffer, src_pitch, src_x, src_y, +       dst_buffer, dst_pitch, dst_x, dst_y, +       w,h,logic_op); + +   assert( logic_op - PIPE_LOGICOP_CLEAR >= 0 ); +   assert( logic_op - PIPE_LOGICOP_CLEAR < 0x10 ); + +   src_pitch *= cpp; +   dst_pitch *= cpp; + +   switch(cpp) { +   case 1: +   case 2: +   case 3: +      BR13 = (translate_raster_op(logic_op) << 16) | (1<<24); +      CMD = XY_SRC_COPY_BLT_CMD; +      break; +   case 4: +      BR13 = (translate_raster_op(logic_op) << 16) | (1<<24) | +	  (1<<25); +      CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; +      break; +   default: +      return; +   } + +   if (src_tiled) { +      CMD |= XY_SRC_TILED; +      src_pitch /= 4; +   } + +   if (dst_tiled) { +      CMD |= XY_DST_TILED; +      dst_pitch /= 4; +   } + +   if (dst_y2 < dst_y || +       dst_x2 < dst_x) { +      return; +   } + +   dst_pitch &= 0xffff; +   src_pitch &= 0xffff; + +   /* Initial y values don't seem to work with negative pitches.  If +    * we adjust the offsets manually (below), it seems to work fine. +    * +    * On the other hand, if we always adjust, the hardware doesn't +    * know which blit directions to use, so overlapping copypixels get +    * the wrong result. +    */ +   if (dst_pitch > 0 && src_pitch > 0) { +      BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); +      OUT_BATCH( CMD ); +      OUT_BATCH( dst_pitch | BR13 ); +      OUT_BATCH( (dst_y << 16) | dst_x ); +      OUT_BATCH( (dst_y2 << 16) | dst_x2 ); +      OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, +		 dst_offset ); +      OUT_BATCH( (src_y << 16) | src_x ); +      OUT_BATCH( src_pitch ); +      OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ, +		 src_offset ); +      ADVANCE_BATCH(); +   } +   else { +      BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); +      OUT_BATCH( CMD ); +      OUT_BATCH( (dst_pitch & 0xffff) | BR13 ); +      OUT_BATCH( (0 << 16) | dst_x ); +      OUT_BATCH( (h << 16) | dst_x2 ); +      OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, +		 dst_offset + dst_y * dst_pitch ); +      OUT_BATCH( (src_pitch & 0xffff) ); +      OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ, +		 src_offset + src_y * src_pitch ); +      ADVANCE_BATCH(); +   } +} + + + diff --git a/src/gallium/drivers/i965simple/brw_blit.h b/src/gallium/drivers/i965simple/brw_blit.h new file mode 100644 index 0000000000..111c5d91d3 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_blit.h @@ -0,0 +1,33 @@ +#ifndef BRW_BLIT_H +#define BRW_BLIT_H + +#include "pipe/p_compiler.h" + +struct pipe_buffer; +struct brw_context; + +void brw_fill_blit(struct brw_context *intel, +                   unsigned cpp, +                   short dst_pitch, +                   struct pipe_buffer *dst_buffer, +                   unsigned dst_offset, +                   boolean dst_tiled, +                   short x, short y, +                   short w, short h, +                   unsigned color); +void brw_copy_blit(struct brw_context *intel, +                   unsigned do_flip, +                   unsigned cpp, +                   short src_pitch, +                   struct pipe_buffer *src_buffer, +                   unsigned  src_offset, +                   boolean src_tiled, +                   short dst_pitch, +                   struct pipe_buffer *dst_buffer, +                   unsigned  dst_offset, +                   boolean dst_tiled, +                   short src_x, short src_y, +                   short dst_x, short dst_y, +                   short w, short h, +                   unsigned logic_op); +#endif diff --git a/src/gallium/drivers/i965simple/brw_cc.c b/src/gallium/drivers/i965simple/brw_cc.c new file mode 100644 index 0000000000..3668123e2e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_cc.c @@ -0,0 +1,269 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" + + +static int brw_translate_compare_func(int func) +{ +   switch(func) { +   case PIPE_FUNC_NEVER: +      return BRW_COMPAREFUNCTION_NEVER; +   case PIPE_FUNC_LESS: +      return BRW_COMPAREFUNCTION_LESS; +   case PIPE_FUNC_LEQUAL: +      return BRW_COMPAREFUNCTION_LEQUAL; +   case PIPE_FUNC_GREATER: +      return BRW_COMPAREFUNCTION_GREATER; +   case PIPE_FUNC_GEQUAL: +      return BRW_COMPAREFUNCTION_GEQUAL; +   case PIPE_FUNC_NOTEQUAL: +      return BRW_COMPAREFUNCTION_NOTEQUAL; +   case PIPE_FUNC_EQUAL: +      return BRW_COMPAREFUNCTION_EQUAL; +   case PIPE_FUNC_ALWAYS: +      return BRW_COMPAREFUNCTION_ALWAYS; +   } + +   debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func); +   return BRW_COMPAREFUNCTION_ALWAYS; +} + +static int brw_translate_stencil_op(int op) +{ +   switch(op) { +   case PIPE_STENCIL_OP_KEEP: +      return BRW_STENCILOP_KEEP; +   case PIPE_STENCIL_OP_ZERO: +      return BRW_STENCILOP_ZERO; +   case PIPE_STENCIL_OP_REPLACE: +      return BRW_STENCILOP_REPLACE; +   case PIPE_STENCIL_OP_INCR: +      return BRW_STENCILOP_INCRSAT; +   case PIPE_STENCIL_OP_DECR: +      return BRW_STENCILOP_DECRSAT; +   case PIPE_STENCIL_OP_INCR_WRAP: +      return BRW_STENCILOP_INCR; +   case PIPE_STENCIL_OP_DECR_WRAP: +      return BRW_STENCILOP_DECR; +   case PIPE_STENCIL_OP_INVERT: +      return BRW_STENCILOP_INVERT; +   default: +      return BRW_STENCILOP_ZERO; +   } +} + + +static int brw_translate_logic_op(int opcode) +{ +   switch(opcode) { +   case PIPE_LOGICOP_CLEAR: +      return BRW_LOGICOPFUNCTION_CLEAR; +   case PIPE_LOGICOP_AND: +      return BRW_LOGICOPFUNCTION_AND; +   case PIPE_LOGICOP_AND_REVERSE: +      return BRW_LOGICOPFUNCTION_AND_REVERSE; +   case PIPE_LOGICOP_COPY: +      return BRW_LOGICOPFUNCTION_COPY; +   case PIPE_LOGICOP_COPY_INVERTED: +      return BRW_LOGICOPFUNCTION_COPY_INVERTED; +   case PIPE_LOGICOP_AND_INVERTED: +      return BRW_LOGICOPFUNCTION_AND_INVERTED; +   case PIPE_LOGICOP_NOOP: +      return BRW_LOGICOPFUNCTION_NOOP; +   case PIPE_LOGICOP_XOR: +      return BRW_LOGICOPFUNCTION_XOR; +   case PIPE_LOGICOP_OR: +      return BRW_LOGICOPFUNCTION_OR; +   case PIPE_LOGICOP_OR_INVERTED: +      return BRW_LOGICOPFUNCTION_OR_INVERTED; +   case PIPE_LOGICOP_NOR: +      return BRW_LOGICOPFUNCTION_NOR; +   case PIPE_LOGICOP_EQUIV: +      return BRW_LOGICOPFUNCTION_EQUIV; +   case PIPE_LOGICOP_INVERT: +      return BRW_LOGICOPFUNCTION_INVERT; +   case PIPE_LOGICOP_OR_REVERSE: +      return BRW_LOGICOPFUNCTION_OR_REVERSE; +   case PIPE_LOGICOP_NAND: +      return BRW_LOGICOPFUNCTION_NAND; +   case PIPE_LOGICOP_SET: +      return BRW_LOGICOPFUNCTION_SET; +   default: +      return BRW_LOGICOPFUNCTION_SET; +   } +} + + +static void upload_cc_vp( struct brw_context *brw ) +{ +   struct brw_cc_viewport ccv; + +   memset(&ccv, 0, sizeof(ccv)); + +   ccv.min_depth = 0.0; +   ccv.max_depth = 1.0; + +   brw->cc.vp_gs_offset = brw_cache_data( &brw->cache[BRW_CC_VP], &ccv ); +} + +const struct brw_tracked_state brw_cc_vp = { +   .dirty = { +      .brw = BRW_NEW_SCENE, +      .cache = 0 +   }, +   .update = upload_cc_vp +}; + + +static void upload_cc_unit( struct brw_context *brw ) +{ +   struct brw_cc_unit_state cc; + +   memset(&cc, 0, sizeof(cc)); + +   /* BRW_NEW_DEPTH_STENCIL */ +   if (brw->attribs.DepthStencil->stencil[0].enabled) { +      cc.cc0.stencil_enable = brw->attribs.DepthStencil->stencil[0].enabled; +      cc.cc0.stencil_func = brw_translate_compare_func(brw->attribs.DepthStencil->stencil[0].func); +      cc.cc0.stencil_fail_op = brw_translate_stencil_op(brw->attribs.DepthStencil->stencil[0].fail_op); +      cc.cc0.stencil_pass_depth_fail_op = brw_translate_stencil_op( +         brw->attribs.DepthStencil->stencil[0].zfail_op); +      cc.cc0.stencil_pass_depth_pass_op = brw_translate_stencil_op( +         brw->attribs.DepthStencil->stencil[0].zpass_op); +      cc.cc1.stencil_ref = brw->attribs.DepthStencil->stencil[0].ref_value; +      cc.cc1.stencil_write_mask = brw->attribs.DepthStencil->stencil[0].writemask; +      cc.cc1.stencil_test_mask = brw->attribs.DepthStencil->stencil[0].valuemask; + +      if (brw->attribs.DepthStencil->stencil[1].enabled) { +	 cc.cc0.bf_stencil_enable = brw->attribs.DepthStencil->stencil[1].enabled; +	 cc.cc0.bf_stencil_func = brw_translate_compare_func( +            brw->attribs.DepthStencil->stencil[1].func); +	 cc.cc0.bf_stencil_fail_op = brw_translate_stencil_op( +            brw->attribs.DepthStencil->stencil[1].fail_op); +	 cc.cc0.bf_stencil_pass_depth_fail_op = brw_translate_stencil_op( +            brw->attribs.DepthStencil->stencil[1].zfail_op); +	 cc.cc0.bf_stencil_pass_depth_pass_op = brw_translate_stencil_op( +            brw->attribs.DepthStencil->stencil[1].zpass_op); +	 cc.cc1.bf_stencil_ref = brw->attribs.DepthStencil->stencil[1].ref_value; +	 cc.cc2.bf_stencil_write_mask = brw->attribs.DepthStencil->stencil[1].writemask; +	 cc.cc2.bf_stencil_test_mask = brw->attribs.DepthStencil->stencil[1].valuemask; +      } + +      /* Not really sure about this: +       */ +      if (brw->attribs.DepthStencil->stencil[0].writemask || +	  brw->attribs.DepthStencil->stencil[1].writemask) +	 cc.cc0.stencil_write_enable = 1; +   } + +   /* BRW_NEW_BLEND */ +   if (brw->attribs.Blend->logicop_enable) { +      cc.cc2.logicop_enable = 1; +      cc.cc5.logicop_func = brw_translate_logic_op( brw->attribs.Blend->logicop_func ); +   } +   else if (brw->attribs.Blend->blend_enable) { +      int eqRGB = brw->attribs.Blend->rgb_func; +      int eqA = brw->attribs.Blend->alpha_func; +      int srcRGB = brw->attribs.Blend->rgb_src_factor; +      int dstRGB = brw->attribs.Blend->rgb_dst_factor; +      int srcA = brw->attribs.Blend->alpha_src_factor; +      int dstA = brw->attribs.Blend->alpha_dst_factor; + +      if (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX) { +	 srcRGB = dstRGB = PIPE_BLENDFACTOR_ONE; +      } + +      if (eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX) { +	 srcA = dstA = PIPE_BLENDFACTOR_ONE; +      } + +      cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); +      cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); +      cc.cc6.blend_function = brw_translate_blend_equation( eqRGB ); + +      cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); +      cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); +      cc.cc5.ia_blend_function = brw_translate_blend_equation( eqA ); + +      cc.cc3.blend_enable = 1; +      cc.cc3.ia_blend_enable = (srcA != srcRGB || +				dstA != dstRGB || +				eqA != eqRGB); +   } +    +   /* BRW_NEW_ALPHATEST +    */ +   if (brw->attribs.DepthStencil->alpha.enabled) { +      cc.cc3.alpha_test = 1; +      cc.cc3.alpha_test_func =  +	 brw_translate_compare_func(brw->attribs.DepthStencil->alpha.func); + +      cc.cc7.alpha_ref.ub[0] = float_to_ubyte(brw->attribs.DepthStencil->alpha.ref_value); + +      cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; +   } + +   if (brw->attribs.Blend->dither) { +      cc.cc5.dither_enable = 1; +      cc.cc6.y_dither_offset = 0; +      cc.cc6.x_dither_offset = 0; +   } + +   if (brw->attribs.DepthStencil->depth.enabled) { +      cc.cc2.depth_test = brw->attribs.DepthStencil->depth.enabled; +      cc.cc2.depth_test_function = brw_translate_compare_func(brw->attribs.DepthStencil->depth.func); +      cc.cc2.depth_write_enable = brw->attribs.DepthStencil->depth.writemask; +   } + +   /* CACHE_NEW_CC_VP */ +   cc.cc4.cc_viewport_state_offset =  brw->cc.vp_gs_offset >> 5; + +   if (BRW_DEBUG & DEBUG_STATS) +      cc.cc5.statistics_enable = 1; + +   brw->cc.state_gs_offset = brw_cache_data( &brw->cache[BRW_CC_UNIT], &cc ); +} + +const struct brw_tracked_state brw_cc_unit = { +   .dirty = { +      .brw = BRW_NEW_DEPTH_STENCIL | BRW_NEW_BLEND | BRW_NEW_ALPHA_TEST, +      .cache = CACHE_NEW_CC_VP +   }, +   .update = upload_cc_unit +}; + diff --git a/src/gallium/drivers/i965simple/brw_clip.c b/src/gallium/drivers/i965simple/brw_clip.c new file mode 100644 index 0000000000..268124cc53 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip.c @@ -0,0 +1,206 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_clip.h" + +#define FRONT_UNFILLED_BIT  0x1 +#define BACK_UNFILLED_BIT   0x2 + + +static void compile_clip_prog( struct brw_context *brw, +			     struct brw_clip_prog_key *key ) +{ +   struct brw_clip_compile c; +   const unsigned *program; +   unsigned program_size; +   unsigned delta; +   unsigned i; + +   memset(&c, 0, sizeof(c)); + +   /* Begin the compilation: +    */ +   brw_init_compile(&c.func); + +   c.func.single_program_flow = 1; + +   c.key = *key; + + +   /* Need to locate the two positions present in vertex + header. +    * These are currently hardcoded: +    */ +   c.header_position_offset = ATTR_SIZE; + +   for (i = 0, delta = REG_SIZE; i < PIPE_MAX_SHADER_OUTPUTS; i++) +      if (c.key.attrs & (1<<i)) { +	 c.offset[i] = delta; +	 delta += ATTR_SIZE; +      } + +   c.nr_attrs = brw_count_bits(c.key.attrs); +   c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */ +   c.nr_bytes = c.nr_regs * REG_SIZE; + +   c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ + +   /* For some reason the thread is spawned with only 4 channels +    * unmasked. +    */ +   brw_set_mask_control(&c.func, BRW_MASK_DISABLE); + + +   /* Would ideally have the option of producing a program which could +    * do all three: +    */ +   switch (key->primitive) { +   case PIPE_PRIM_TRIANGLES: +#if 0 +      if (key->do_unfilled) +	 brw_emit_unfilled_clip( &c ); +      else +#endif +	 brw_emit_tri_clip( &c ); +      break; +   case PIPE_PRIM_LINES: +      brw_emit_line_clip( &c ); +      break; +   case PIPE_PRIM_POINTS: +      brw_emit_point_clip( &c ); +      break; +   default: +      assert(0); +      return; +   } + + + +   /* get the program +    */ +   program = brw_get_program(&c.func, &program_size); + +   /* Upload +    */ +   brw->clip.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_CLIP_PROG], +						&c.key, +						sizeof(c.key), +						program, +						program_size, +						&c.prog_data, +						&brw->clip.prog_data ); +} + + +static boolean search_cache( struct brw_context *brw, +			       struct brw_clip_prog_key *key ) +{ +   return brw_search_cache(&brw->cache[BRW_CLIP_PROG], +			   key, sizeof(*key), +			   &brw->clip.prog_data, +			   &brw->clip.prog_gs_offset); +} + + + + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_clip_prog(struct brw_context *brw) +{ +   struct brw_clip_prog_key key; + +   memset(&key, 0, sizeof(key)); + +   /* Populate the key: +    */ +   /* BRW_NEW_REDUCED_PRIMITIVE */ +   key.primitive = brw->reduced_primitive; +   /* CACHE_NEW_VS_PROG */ +   key.attrs = brw->vs.prog_data->outputs_written; +   /* BRW_NEW_RASTER */ +   key.do_flat_shading = (brw->attribs.Raster->flatshade); +   /* BRW_NEW_CLIP */ +   key.nr_userclip = brw->attribs.Clip.nr; /* XXX */ + +#if 0 +   key.clip_mode = BRW_CLIPMODE_NORMAL; + +   if (key.primitive == PIPE_PRIM_TRIANGLES) { +      if (brw->attribs.Raster->cull_mode == PIPE_WINDING_BOTH) +	 key.clip_mode = BRW_CLIPMODE_REJECT_ALL; +      else { +         if (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL || +             brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL) +            key.do_unfilled = 1; + +	 /* Most cases the fixed function units will handle.  Cases where +	  * one or more polygon faces are unfilled will require help: +	  */ +	 if (key.do_unfilled) { +	    key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; + +	    if (brw->attribs.Raster->offset_cw || +                brw->attribs.Raster->offset_ccw) { +	       key.offset_units = brw->attribs.Raster->offset_units; +	       key.offset_factor = brw->attribs.Raster->offset_scale; +	    } +            key.fill_ccw = brw->attribs.Raster->fill_ccw; +            key.fill_cw = brw->attribs.Raster->fill_cw; +            key.offset_ccw = brw->attribs.Raster->offset_ccw; +            key.offset_cw = brw->attribs.Raster->offset_cw; +            if (brw->attribs.Raster->light_twoside && +                key.fill_cw != CLIP_CULL) +               key.copy_bfc_cw = 1; +	 } +      } +   } +#else +   key.clip_mode = BRW_CLIPMODE_ACCEPT_ALL; +#endif + +   if (!search_cache(brw, &key)) +      compile_clip_prog( brw, &key ); +} + +const struct brw_tracked_state brw_clip_prog = { +   .dirty = { +      .brw   = (BRW_NEW_RASTERIZER | +		BRW_NEW_CLIP | +		BRW_NEW_REDUCED_PRIMITIVE), +      .cache = CACHE_NEW_VS_PROG +   }, +   .update = upload_clip_prog +}; diff --git a/src/gallium/drivers/i965simple/brw_clip.h b/src/gallium/drivers/i965simple/brw_clip.h new file mode 100644 index 0000000000..d70fc094ff --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip.h @@ -0,0 +1,170 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. +  + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: +  + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. +  + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +  + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + +#ifndef BRW_CLIP_H +#define BRW_CLIP_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_VERTS (3+6+6)	 + +/* Note that if unfilled primitives are being emitted, we have to fix + * up polygon offset and flatshading at this point: + */ +struct brw_clip_prog_key { +   unsigned attrs:32;		 +   unsigned primitive:4; +   unsigned nr_userclip:3; +   unsigned do_flat_shading:1; +   unsigned do_unfilled:1; +   unsigned fill_cw:2;		/* includes cull information */ +   unsigned fill_ccw:2;		/* includes cull information */ +   unsigned offset_cw:1; +   unsigned offset_ccw:1; +   unsigned pad0:17; + +   unsigned copy_bfc_cw:1; +   unsigned copy_bfc_ccw:1; +   unsigned clip_mode:3; +   unsigned pad1:27; +    +   float offset_factor; +   float offset_units; +}; + + +#define CLIP_LINE   0 +#define CLIP_POINT  1 +#define CLIP_FILL   2 +#define CLIP_CULL   3 + + +#define PRIM_MASK  (0x1f) + +struct brw_clip_compile { +   struct brw_compile func; +   struct brw_clip_prog_key key; +   struct brw_clip_prog_data prog_data; +    +   struct { +      struct brw_reg R0; +      struct brw_reg vertex[MAX_VERTS]; + +      struct brw_reg t; +      struct brw_reg t0, t1; +      struct brw_reg dp0, dp1; + +      struct brw_reg dpPrev; +      struct brw_reg dp; +      struct brw_reg loopcount; +      struct brw_reg nr_verts; +      struct brw_reg planemask; + +      struct brw_reg inlist; +      struct brw_reg outlist; +      struct brw_reg freelist; + +      struct brw_reg dir; +      struct brw_reg tmp0, tmp1; +      struct brw_reg offset; +       +      struct brw_reg fixed_planes; +      struct brw_reg plane_equation; +   } reg; + +   /* 3 different ways of expressing vertex size: +    */ +   unsigned nr_attrs; +   unsigned nr_regs; +   unsigned nr_bytes; + +   unsigned first_tmp; +   unsigned last_tmp; + +   boolean need_direction; + +   unsigned last_mrf; + +   unsigned header_position_offset; +   unsigned offset[PIPE_MAX_ATTRIBS]; +}; + +#define ATTR_SIZE  (4*4) + +/* Points are only culled, so no need for a clip routine, however it + * works out easier to have a dummy one. + */ +void brw_emit_unfilled_clip( struct brw_clip_compile *c ); +void brw_emit_tri_clip( struct brw_clip_compile *c ); +void brw_emit_line_clip( struct brw_clip_compile *c ); +void brw_emit_point_clip( struct brw_clip_compile *c ); + +/* brw_clip_tri.c, for use by the unfilled clip routine: + */ +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ); +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ); +void brw_clip_tri( struct brw_clip_compile *c ); +void brw_clip_tri_emit_polygon( struct brw_clip_compile *c ); +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,  +			      unsigned nr_verts ); + + +/* Utils: + */ + +void brw_clip_interp_vertex( struct brw_clip_compile *c, +			     struct brw_indirect dest_ptr, +			     struct brw_indirect v0_ptr, /* from */ +			     struct brw_indirect v1_ptr, /* to */ +			     struct brw_reg t0, +			     boolean force_edgeflag ); + +void brw_clip_init_planes( struct brw_clip_compile *c ); + +void brw_clip_emit_vue(struct brw_clip_compile *c,  +		       struct brw_indirect vert, +		       boolean allocate, +		       boolean eot, +		       unsigned header); + +void brw_clip_kill_thread(struct brw_clip_compile *c); + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ); +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ); + +void brw_clip_copy_colors( struct brw_clip_compile *c, +			   unsigned to, unsigned from ); + +void brw_clip_init_clipmask( struct brw_clip_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_clip_line.c b/src/gallium/drivers/i965simple/brw_clip_line.c new file mode 100644 index 0000000000..75d9e5fcda --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_line.c @@ -0,0 +1,245 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) +{ +   unsigned i = 0,j; + +   /* Register usage is static, precompute here: +    */ +   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + +   if (c->key.nr_userclip) { +      c->reg.fixed_planes = brw_vec4_grf(i, 0); +      i += (6 + c->key.nr_userclip + 1) / 2; + +      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; +   } +   else +      c->prog_data.curb_read_length = 0; + + +   /* Payload vertices plus space for more generated vertices: +    */ +   for (j = 0; j < 4; j++) { +      c->reg.vertex[j] = brw_vec4_grf(i, 0); +      i += c->nr_regs; +   } + +   c->reg.t           = brw_vec1_grf(i, 0); +   c->reg.t0          = brw_vec1_grf(i, 1); +   c->reg.t1          = brw_vec1_grf(i, 2); +   c->reg.planemask   = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); +   c->reg.plane_equation = brw_vec4_grf(i, 4); +   i++; + +   c->reg.dp0         = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ +   c->reg.dp1         = brw_vec1_grf(i, 4); +   i++; + +   if (!c->key.nr_userclip) { +      c->reg.fixed_planes = brw_vec8_grf(i, 0); +      i++; +   } + + +   c->first_tmp = i; +   c->last_tmp = i; + +   c->prog_data.urb_read_length = c->nr_regs; /* ? */ +   c->prog_data.total_grf = i; +} + + + +/* Line clipping, more or less following the following algorithm: + * + *  for (p=0;p<MAX_PLANES;p++) { + *     if (clipmask & (1 << p)) { + *        float dp0 = DOTPROD( vtx0, plane[p] ); + *        float dp1 = DOTPROD( vtx1, plane[p] ); + * + *        if (IS_NEGATIVE(dp1)) { + *           float t = dp1 / (dp1 - dp0); + *           if (t > t1) t1 = t; + *        } else { + *           float t = dp0 / (dp0 - dp1); + *           if (t > t0) t0 = t; + *        } + * + *        if (t0 + t1 >= 1.0) + *           return; + *     } + *  } + * + *  interp( ctx, newvtx0, vtx0, vtx1, t0 ); + *  interp( ctx, newvtx1, vtx1, vtx0, t1 ); + * + */ +static void clip_and_emit_line( struct brw_clip_compile *c ) +{ +   struct brw_compile *p = &c->func; +   struct brw_indirect vtx0     = brw_indirect(0, 0); +   struct brw_indirect vtx1      = brw_indirect(1, 0); +   struct brw_indirect newvtx0   = brw_indirect(2, 0); +   struct brw_indirect newvtx1   = brw_indirect(3, 0); +   struct brw_indirect plane_ptr = brw_indirect(4, 0); +   struct brw_instruction *plane_loop; +   struct brw_instruction *plane_active; +   struct brw_instruction *is_negative; +   struct brw_instruction *is_neg2; +   struct brw_instruction *not_culled; +   struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); + +   brw_MOV(p, get_addr_reg(vtx0),      brw_address(c->reg.vertex[0])); +   brw_MOV(p, get_addr_reg(vtx1),      brw_address(c->reg.vertex[1])); +   brw_MOV(p, get_addr_reg(newvtx0),   brw_address(c->reg.vertex[2])); +   brw_MOV(p, get_addr_reg(newvtx1),   brw_address(c->reg.vertex[3])); +   brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + +   /* Note: init t0, t1 together: +    */ +   brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0)); + +   brw_clip_init_planes(c); +   brw_clip_init_clipmask(c); + +   /* -ve rhw workaround */ +   brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); +   brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), +	   brw_imm_ud(1<<20)); +   brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); +   brw_set_predicate_control(p, BRW_PREDICATE_NONE); + +   plane_loop = brw_DO(p, BRW_EXECUTE_1); +   { +      /* if (planemask & 1) +       */ +      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); +      brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1)); + +      plane_active = brw_IF(p, BRW_EXECUTE_1); +      { +	 if (c->key.nr_userclip) +	    brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); +	 else +	    brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + +#if 0 +	 /* dp = DP4(vtx->position, plane) +	  */ +	 brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + +	 /* if (IS_NEGATIVE(dp1)) +	  */ +	 brw_set_conditionalmod(p, BRW_CONDITIONAL_L); +	 brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); +#else +         #warning "disabled" +#endif +	 is_negative = brw_IF(p, BRW_EXECUTE_1); +	 { +	    brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); +	    brw_math_invert(p, c->reg.t, c->reg.t); +	    brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); + +	    brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); +	    brw_MOV(p, c->reg.t1, c->reg.t); +	    brw_set_predicate_control(p, BRW_PREDICATE_NONE); +	 } +	 is_negative = brw_ELSE(p, is_negative); +	 { +	    /* Coming back in.  We know that both cannot be negative +	     * because the line would have been culled in that case. +	     */ + +	    /* If both are positive, do nothing */ +             brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); +             is_neg2 = brw_IF(p, BRW_EXECUTE_1); +             { +		brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); +		brw_math_invert(p, c->reg.t, c->reg.t); +		brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); + +		brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); +		brw_MOV(p, c->reg.t0, c->reg.t); +		brw_set_predicate_control(p, BRW_PREDICATE_NONE); +	     } +	     brw_ENDIF(p, is_neg2); +	 } +	 brw_ENDIF(p, is_negative); +      } +      brw_ENDIF(p, plane_active); + +      /* plane_ptr++; +       */ +      brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + +      /* while (planemask>>=1) != 0 +       */ +      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); +      brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); +   } +   brw_WHILE(p, plane_loop); + +   brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); +   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); +   not_culled = brw_IF(p, BRW_EXECUTE_1); +   { +      brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, FALSE); +      brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, FALSE); + +      brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); +      brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); +   } +   brw_ENDIF(p, not_culled); +   brw_clip_kill_thread(c); +} + + + +void brw_emit_line_clip( struct brw_clip_compile *c ) +{ +   brw_clip_line_alloc_regs(c); + +   if (c->key.do_flat_shading) +      brw_clip_copy_colors(c, 0, 1); + +   clip_and_emit_line(c); +} diff --git a/src/gallium/drivers/i965simple/brw_clip_point.c b/src/gallium/drivers/i965simple/brw_clip_point.c new file mode 100644 index 0000000000..6fce7210d1 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_point.c @@ -0,0 +1,47 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. +  + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: +  + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. +  + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +  + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + +/* Point clipping, nothing to do? + */ +void brw_emit_point_clip( struct brw_clip_compile *c ) +{ +   /* Send an empty message to kill the thread: +    */ +   brw_clip_tri_alloc_regs(c, 0); +   brw_clip_kill_thread(c); +} diff --git a/src/gallium/drivers/i965simple/brw_clip_state.c b/src/gallium/drivers/i965simple/brw_clip_state.c new file mode 100644 index 0000000000..8e78dd51be --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_state.c @@ -0,0 +1,93 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. +  + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: +  + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. +  + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +  + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + +static void upload_clip_unit( struct brw_context *brw ) +{ +   struct brw_clip_unit_state clip; + +   memset(&clip, 0, sizeof(clip)); + +   /* CACHE_NEW_CLIP_PROG */ +   clip.thread0.grf_reg_count = +      align(brw->clip.prog_data->total_grf, 16) / 16 - 1; +   clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6; +   clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; +   clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length; +   clip.clip5.clip_mode = brw->clip.prog_data->clip_mode; + +   /* BRW_NEW_CURBE_OFFSETS */ +   clip.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + +   /* BRW_NEW_URB_FENCE */ +   clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries;  +   clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; +   clip.thread4.max_threads = 1; /* 2 threads */ + +   if (BRW_DEBUG & DEBUG_STATS) +      clip.thread4.stats_enable = 1;  + +   /* CONSTANT */ +   clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; +   clip.thread1.single_program_flow = 1; +   clip.thread3.dispatch_grf_start_reg = 1; +   clip.thread3.urb_entry_read_offset = 0; +   clip.clip5.userclip_enable_flags = 0x7f; +   clip.clip5.userclip_must_clip = 1; +   clip.clip5.guard_band_enable = 0; +   clip.clip5.viewport_z_clip_enable = 1; +   clip.clip5.viewport_xy_clip_enable = 1; +   clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; +   clip.clip5.api_mode = BRW_CLIP_API_OGL;    +   clip.clip6.clipper_viewport_state_ptr = 0; +   clip.viewport_xmin = -1; +   clip.viewport_xmax = 1; +   clip.viewport_ymin = -1; +   clip.viewport_ymax = 1; + +   brw->clip.state_gs_offset = brw_cache_data( &brw->cache[BRW_CLIP_UNIT], &clip ); +} + + +const struct brw_tracked_state brw_clip_unit = { +   .dirty = { +      .brw   = (BRW_NEW_CURBE_OFFSETS | +		BRW_NEW_URB_FENCE), +      .cache = CACHE_NEW_CLIP_PROG +   }, +   .update = upload_clip_unit +}; diff --git a/src/gallium/drivers/i965simple/brw_clip_tri.c b/src/gallium/drivers/i965simple/brw_clip_tri.c new file mode 100644 index 0000000000..c5da7b825e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_tri.c @@ -0,0 +1,566 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + +static struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ +   struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + +   if (++c->last_tmp > c->prog_data.total_grf) +      c->prog_data.total_grf = c->last_tmp; + +   return tmp; +} + +static void release_tmps( struct brw_clip_compile *c ) +{ +   c->last_tmp = c->first_tmp; +} + + +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, +			      unsigned nr_verts ) +{ +   unsigned i = 0,j; + +   /* Register usage is static, precompute here: +    */ +   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + +   if (c->key.nr_userclip) { +      c->reg.fixed_planes = brw_vec4_grf(i, 0); +      i += (6 + c->key.nr_userclip + 1) / 2; + +      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; +   } +   else +      c->prog_data.curb_read_length = 0; + + +   /* Payload vertices plus space for more generated vertices: +    */ +   for (j = 0; j < nr_verts; j++) { +      c->reg.vertex[j] = brw_vec4_grf(i, 0); +      i += c->nr_regs; +   } + +   if (c->nr_attrs & 1) { +      for (j = 0; j < 3; j++) { +	 unsigned delta = c->nr_attrs*16 + 32; +	 brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); +      } +   } + +   c->reg.t          = brw_vec1_grf(i, 0); +   c->reg.loopcount  = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_UD); +   c->reg.nr_verts   = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); +   c->reg.planemask  = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); +   c->reg.plane_equation = brw_vec4_grf(i, 4); +   i++; + +   c->reg.dpPrev     = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ +   c->reg.dp         = brw_vec1_grf(i, 4); +   i++; + +   c->reg.inlist     = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); +   i++; + +   c->reg.outlist    = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); +   i++; + +   c->reg.freelist   = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); +   i++; + +   if (!c->key.nr_userclip) { +      c->reg.fixed_planes = brw_vec8_grf(i, 0); +      i++; +   } + +   if (c->key.do_unfilled) { +      c->reg.dir     = brw_vec4_grf(i, 0); +      c->reg.offset  = brw_vec4_grf(i, 4); +      i++; +      c->reg.tmp0    = brw_vec4_grf(i, 0); +      c->reg.tmp1    = brw_vec4_grf(i, 4); +      i++; +   } + +   c->first_tmp = i; +   c->last_tmp = i; + +   c->prog_data.urb_read_length = c->nr_regs; /* ? */ +   c->prog_data.total_grf = i; +} + + + +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ +   struct brw_instruction *is_rev; + +   /* Initial list of indices for incoming vertexes: +    */ +   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); +   brw_CMP(p, +	   vec1(brw_null_reg()), +	   BRW_CONDITIONAL_EQ, +	   tmp0, +	   brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); + +   /* XXX: Is there an easier way to do this?  Need to reverse every +    * second tristrip element:  Can ignore sometimes? +    */ +   is_rev = brw_IF(p, BRW_EXECUTE_1); +   { +      brw_MOV(p, get_element(c->reg.inlist, 0),  brw_address(c->reg.vertex[1]) ); +      brw_MOV(p, get_element(c->reg.inlist, 1),  brw_address(c->reg.vertex[0]) ); +      if (c->need_direction) +	 brw_MOV(p, c->reg.dir, brw_imm_f(-1)); +   } +   is_rev = brw_ELSE(p, is_rev); +   { +      brw_MOV(p, get_element(c->reg.inlist, 0),  brw_address(c->reg.vertex[0]) ); +      brw_MOV(p, get_element(c->reg.inlist, 1),  brw_address(c->reg.vertex[1]) ); +      if (c->need_direction) +	 brw_MOV(p, c->reg.dir, brw_imm_f(1)); +   } +   brw_ENDIF(p, is_rev); + +   brw_MOV(p, get_element(c->reg.inlist, 2),  brw_address(c->reg.vertex[2]) ); +   brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0)); +   brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3)); +} + + + +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) +{ +   struct brw_compile *p = &c->func; +   struct brw_instruction *is_poly; +   struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + +   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); +   brw_CMP(p, +	   vec1(brw_null_reg()), +	   BRW_CONDITIONAL_EQ, +	   tmp0, +	   brw_imm_ud(_3DPRIM_POLYGON)); + +   is_poly = brw_IF(p, BRW_EXECUTE_1); +   { +      brw_clip_copy_colors(c, 1, 0); +      brw_clip_copy_colors(c, 2, 0); +   } +   is_poly = brw_ELSE(p, is_poly); +   { +      brw_clip_copy_colors(c, 0, 2); +      brw_clip_copy_colors(c, 1, 2); +   } +   brw_ENDIF(p, is_poly); +} + + + +/* Use mesa's clipping algorithms, translated to GEN4 assembly. + */ +void brw_clip_tri( struct brw_clip_compile *c ) +{ +#if 0 +   struct brw_compile *p = &c->func; +   struct brw_indirect vtx = brw_indirect(0, 0); +   struct brw_indirect vtxPrev = brw_indirect(1, 0); +   struct brw_indirect vtxOut = brw_indirect(2, 0); +   struct brw_indirect plane_ptr = brw_indirect(3, 0); +   struct brw_indirect inlist_ptr = brw_indirect(4, 0); +   struct brw_indirect outlist_ptr = brw_indirect(5, 0); +   struct brw_indirect freelist_ptr = brw_indirect(6, 0); +   struct brw_instruction *plane_loop; +   struct brw_instruction *plane_active; +   struct brw_instruction *vertex_loop; +   struct brw_instruction *next_test; +   struct brw_instruction *prev_test; + +   brw_MOV(p, get_addr_reg(vtxPrev),     brw_address(c->reg.vertex[2]) ); +   brw_MOV(p, get_addr_reg(plane_ptr),   brw_clip_plane0_address(c)); +   brw_MOV(p, get_addr_reg(inlist_ptr),  brw_address(c->reg.inlist)); +   brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + +   brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); + +   plane_loop = brw_DO(p, BRW_EXECUTE_1); +   { +      /* if (planemask & 1) +       */ +      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); +      brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1)); + +      plane_active = brw_IF(p, BRW_EXECUTE_1); +      { +	 /* vtxOut = freelist_ptr++ +	  */ +	 brw_MOV(p, get_addr_reg(vtxOut),       get_addr_reg(freelist_ptr) ); +	 brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE)); + +	 if (c->key.nr_userclip) +	    brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); +	 else +	    brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + +	 brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); +	 brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); + +	 vertex_loop = brw_DO(p, BRW_EXECUTE_1); +	 { +	    /* vtx = *input_ptr; +	     */ +	    brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0)); + +	    /* IS_NEGATIVE(prev) */ +	    brw_set_conditionalmod(p, BRW_CONDITIONAL_L); +	    brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); +	    prev_test = brw_IF(p, BRW_EXECUTE_1); +	    { +	       /* IS_POSITIVE(next) +		*/ +	       brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); +	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); +	       next_test = brw_IF(p, BRW_EXECUTE_1); +	       { + +		  /* Coming back in. +		   */ +		  brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp)); +		  brw_math_invert(p, c->reg.t, c->reg.t); +		  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev); + +		  /* If (vtxOut == 0) vtxOut = vtxPrev +		   */ +		  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); +		  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) ); +		  brw_set_predicate_control(p, BRW_PREDICATE_NONE); + +		  brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, FALSE); + +		  /* *outlist_ptr++ = vtxOut; +		   * nr_verts++; +		   * vtxOut = 0; +		   */ +		  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); +		  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); +		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); +		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); +	       } +	       brw_ENDIF(p, next_test); + +	    } +	    prev_test = brw_ELSE(p, prev_test); +	    { +	       /* *outlist_ptr++ = vtxPrev; +		* nr_verts++; +		*/ +	       brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev)); +	       brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); +	       brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + +	       /* IS_NEGATIVE(next) +		*/ +	       brw_set_conditionalmod(p, BRW_CONDITIONAL_L); +	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); +	       next_test = brw_IF(p, BRW_EXECUTE_1); +	       { +		  /* Going out of bounds.  Avoid division by zero as we +		   * know dp != dpPrev from DIFFERENT_SIGNS, above. +		   */ +		  brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev)); +		  brw_math_invert(p, c->reg.t, c->reg.t); +		  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp); + +		  /* If (vtxOut == 0) vtxOut = vtx +		   */ +		  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); +		  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) ); +		  brw_set_predicate_control(p, BRW_PREDICATE_NONE); + +		  brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, TRUE); + +		  /* *outlist_ptr++ = vtxOut; +		   * nr_verts++; +		   * vtxOut = 0; +		   */ +		  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); +		  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); +		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); +		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); +	       } +	       brw_ENDIF(p, next_test); +	    } +	    brw_ENDIF(p, prev_test); + +	    /* vtxPrev = vtx; +	     * inlist_ptr++; +	     */ +	    brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx)); +	    brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short))); + +	    /* while (--loopcount != 0) +	     */ +	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); +	    brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); +	 } +	 brw_WHILE(p, vertex_loop); + +	 /* vtxPrev = *(outlist_ptr-1)  OR: outlist[nr_verts-1] +	  * inlist = outlist +	  * inlist_ptr = &inlist[0] +	  * outlist_ptr = &outlist[0] +	  */ +	 brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2)); +	 brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0)); +	 brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0)); +	 brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); +	 brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); +      } +      brw_ENDIF(p, plane_active); + +      /* plane_ptr++; +       */ +      brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + +      /* nr_verts >= 3 +       */ +      brw_CMP(p, +	      vec1(brw_null_reg()), +	      BRW_CONDITIONAL_GE, +	      c->reg.nr_verts, +	      brw_imm_ud(3)); + +      /* && (planemask>>=1) != 0 +       */ +      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); +      brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); +   } +   brw_WHILE(p, plane_loop); +#else +         #warning "disabled" +#endif +} + + + +void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) +{ +   struct brw_compile *p = &c->func; +   struct brw_instruction *loop, *if_insn; + +   /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--) +    */ +   brw_set_conditionalmod(p, BRW_CONDITIONAL_G); +   brw_ADD(p, +	   c->reg.loopcount, +	   c->reg.nr_verts, +	   brw_imm_d(-2)); + +   if_insn = brw_IF(p, BRW_EXECUTE_1); +   { +      struct brw_indirect v0 = brw_indirect(0, 0); +      struct brw_indirect vptr = brw_indirect(1, 0); + +      brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist)); +      brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + +      brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START)); + +      brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); +      brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + +      loop = brw_DO(p, BRW_EXECUTE_1); +      { +	 brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2)); + +	 brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); +	 brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + +	 brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); +	 brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); +      } +      brw_WHILE(p, loop); + +      brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END)); +   } +   brw_ENDIF(p, if_insn); +} + +static void do_clip_tri( struct brw_clip_compile *c ) +{ +   brw_clip_init_planes(c); + +   brw_clip_tri(c); +} + + +static void maybe_do_clip_tri( struct brw_clip_compile *c ) +{ +   struct brw_compile *p = &c->func; +   struct brw_instruction *do_clip; + +   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); +   do_clip = brw_IF(p, BRW_EXECUTE_1); +   { +      do_clip_tri(c); +   } +   brw_ENDIF(p, do_clip); +} + +static void brw_clip_test( struct brw_clip_compile *c ) +{ +#if 0 +    struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); +    struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); +    struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); +    struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + +    struct brw_reg v0 = get_tmp(c); +    struct brw_reg v1 = get_tmp(c); +    struct brw_reg v2 = get_tmp(c); + +    struct brw_indirect vt0 = brw_indirect(0, 0); +    struct brw_indirect vt1 = brw_indirect(1, 0); +    struct brw_indirect vt2 = brw_indirect(2, 0); + +    struct brw_compile *p = &c->func; + +    brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); +    brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); +    brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); +    brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); +    brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); +    brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); + +    /* test nearz, xmin, ymin plane */ +    brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3)); +    brw_set_predicate_control(p, BRW_PREDICATE_NONE); +    brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3)); +    brw_set_predicate_control(p, BRW_PREDICATE_NONE); +    brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3)); +    brw_set_predicate_control(p, BRW_PREDICATE_NONE); +    brw_XOR(p, t, t1, t2); +    brw_XOR(p, t1, t2, t3); +    brw_OR(p, t, t, t1); + +    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, +	    get_element(t, 0), brw_imm_ud(0)); +    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); +    brw_set_predicate_control(p, BRW_PREDICATE_NONE); +    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, +	    get_element(t, 1), brw_imm_ud(0)); +    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); +    brw_set_predicate_control(p, BRW_PREDICATE_NONE); +    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, +	    get_element(t, 2), brw_imm_ud(0)); +    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); +    brw_set_predicate_control(p, BRW_PREDICATE_NONE); + +    /* test farz, xmax, ymax plane */ +    brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3)); +    brw_set_predicate_control(p, BRW_PREDICATE_NONE); +    brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3)); +    brw_set_predicate_control(p, BRW_PREDICATE_NONE); +    brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3)); +    brw_set_predicate_control(p, BRW_PREDICATE_NONE); + +    brw_XOR(p, t, t1, t2); +    brw_XOR(p, t1, t2, t3); +    brw_OR(p, t, t, t1); + +    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, +	    get_element(t, 0), brw_imm_ud(0)); +    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); +    brw_set_predicate_control(p, BRW_PREDICATE_NONE); +    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, +	    get_element(t, 1), brw_imm_ud(0)); +    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); +    brw_set_predicate_control(p, BRW_PREDICATE_NONE); +    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, +	    get_element(t, 2), brw_imm_ud(0)); +    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); +    brw_set_predicate_control(p, BRW_PREDICATE_NONE); + +    release_tmps(c); +#else +         #warning "disabled" +#endif +} + + +void brw_emit_tri_clip( struct brw_clip_compile *c ) +{ +   struct brw_instruction *neg_rhw; +   struct brw_compile *p = &c->func; +   brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); +   brw_clip_tri_init_vertices(c); +   brw_clip_init_clipmask(c); + +   /* if -ve rhw workaround bit is set, +      do cliptest */ +   brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); +   brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), +	   brw_imm_ud(1<<20)); +   neg_rhw = brw_IF(p, BRW_EXECUTE_1); +   { +       brw_clip_test(c); +   } +   brw_ENDIF(p, neg_rhw); + +   /* Can't push into do_clip_tri because with polygon (or quad) +    * flatshading, need to apply the flatshade here because we don't +    * respect the PV when converting to trifan for emit: +    */ +   if (c->key.do_flat_shading) +      brw_clip_tri_flat_shade(c); + +   if (c->key.clip_mode == BRW_CLIPMODE_NORMAL) +      do_clip_tri(c); +   else +      maybe_do_clip_tri(c); + +   brw_clip_tri_emit_polygon(c); + +   /* Send an empty message to kill the thread: +    */ +   brw_clip_kill_thread(c); +} diff --git a/src/gallium/drivers/i965simple/brw_clip_unfilled.c b/src/gallium/drivers/i965simple/brw_clip_unfilled.c new file mode 100644 index 0000000000..b774a76dd6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_unfilled.c @@ -0,0 +1,477 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. +  + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: +  + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. +  + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +  + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +/* This is performed against the original triangles, so no indirection + * required: +BZZZT! + */ +static void compute_tri_direction( struct brw_clip_compile *c ) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg e = c->reg.tmp0; +   struct brw_reg f = c->reg.tmp1; +   struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]);  +   struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]);  +   struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]);  + + +   /* Calculate the vectors of two edges of the triangle: +    */ +   brw_ADD(p, e, v0, negate(v2));  +   brw_ADD(p, f, v1, negate(v2));  + +   /* Take their crossproduct: +    */ +   brw_set_access_mode(p, BRW_ALIGN_16); +   brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3),  brw_swizzle(f,2,0,1,3)); +   brw_MAC(p, vec4(e),  negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3)); +   brw_set_access_mode(p, BRW_ALIGN_1); + +   brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e)); +} + + +static void cull_direction( struct brw_clip_compile *c ) +{ +   struct brw_compile *p = &c->func; +   struct brw_instruction *ccw; +   unsigned conditional; + +   assert (!(c->key.fill_ccw == CLIP_CULL && +	     c->key.fill_cw == CLIP_CULL)); + +   if (c->key.fill_ccw == CLIP_CULL) +      conditional = BRW_CONDITIONAL_GE; +   else +      conditional = BRW_CONDITIONAL_L; + +   brw_CMP(p, +	   vec1(brw_null_reg()), +	   conditional, +	   get_element(c->reg.dir, 2), +	   brw_imm_f(0)); +    +   ccw = brw_IF(p, BRW_EXECUTE_1); +   { +      brw_clip_kill_thread(c); +   } +   brw_ENDIF(p, ccw); +} + + + +static void copy_bfc( struct brw_clip_compile *c ) +{ +   struct brw_compile *p = &c->func; +   struct brw_instruction *ccw; +   unsigned conditional; + +   /* Do we have any colors to copy?  +    */ +   if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) && +       !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])) +      return; + +   /* In some wierd degnerate cases we can end up testing the +    * direction twice, once for culling and once for bfc copying.  Oh +    * well, that's what you get for setting wierd GL state. +    */ +   if (c->key.copy_bfc_ccw) +      conditional = BRW_CONDITIONAL_GE; +   else +      conditional = BRW_CONDITIONAL_L; + +   brw_CMP(p, +	   vec1(brw_null_reg()), +	   conditional, +	   get_element(c->reg.dir, 2), +	   brw_imm_f(0)); +    +   ccw = brw_IF(p, BRW_EXECUTE_1); +   { +      unsigned i; + +      for (i = 0; i < 3; i++) { +	 if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) +	    brw_MOV(p,  +		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]), +		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0])); + +	 if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]) +	    brw_MOV(p,  +		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]), +		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1])); +      } +   } +   brw_ENDIF(p, ccw); +} + + + + +/* +  float iz	= 1.0 / dir.z; +  float ac	= dir.x * iz; +  float bc	= dir.y * iz; +  offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE; +  offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor; +  offset *= MRD; +*/ +static void compute_offset( struct brw_clip_compile *c ) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg off = c->reg.offset; +   struct brw_reg dir = c->reg.dir; +    +   brw_math_invert(p, get_element(off, 2), get_element(dir, 2)); +   brw_MUL(p, vec2(off), dir, get_element(off, 2)); + +   brw_CMP(p,  +	   vec1(brw_null_reg()),  +	   BRW_CONDITIONAL_GE, +	   brw_abs(get_element(off, 0)),  +	   brw_abs(get_element(off, 1))); + +   brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1))); +   brw_set_predicate_control(p, BRW_PREDICATE_NONE); + +   brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor)); +   brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units)); +} + + +static void merge_edgeflags( struct brw_clip_compile *c ) +{ +   struct brw_compile *p = &c->func; +   struct brw_instruction *is_poly; +   struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0); + +   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));  +   brw_CMP(p,  +	   vec1(brw_null_reg()),  +	   BRW_CONDITIONAL_EQ,  +	   tmp0, +	   brw_imm_ud(_3DPRIM_POLYGON)); + +   /* Get away with using reg.vertex because we know that this is not +    * a _3DPRIM_TRISTRIP_REVERSE: +    */ +   is_poly = brw_IF(p, BRW_EXECUTE_1); +   {    +      brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); +      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); +      brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); +      brw_set_predicate_control(p, BRW_PREDICATE_NONE); + +      brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); +      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); +      brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); +      brw_set_predicate_control(p, BRW_PREDICATE_NONE); +   } +   brw_ENDIF(p, is_poly); +} + + + +static void apply_one_offset( struct brw_clip_compile *c, +			  struct brw_indirect vert ) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg pos = deref_4f(vert, c->offset[VERT_RESULT_HPOS]); +   struct brw_reg z = get_element(pos, 2); + +   brw_ADD(p, z, z, vec1(c->reg.offset)); +} + + + +/*********************************************************************** + * Output clipped polygon as an unfilled primitive: + */ +static void emit_lines(struct brw_clip_compile *c, +		       boolean do_offset) +{ +   struct brw_compile *p = &c->func; +   struct brw_instruction *loop; +   struct brw_instruction *draw_edge; +   struct brw_indirect v0 = brw_indirect(0, 0); +   struct brw_indirect v1 = brw_indirect(1, 0); +   struct brw_indirect v0ptr = brw_indirect(2, 0); +   struct brw_indirect v1ptr = brw_indirect(3, 0); + +   /* Need a seperate loop for offset: +    */ +   if (do_offset) { +      brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); +      brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + +      loop = brw_DO(p, BRW_EXECUTE_1); +      { +	 brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); +	 brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); +	     +	 apply_one_offset(c, v0); +	     +	 brw_set_conditionalmod(p, BRW_CONDITIONAL_G); +	 brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); +      } +      brw_WHILE(p, loop); +   } + +   /* v1ptr = &inlist[nr_verts] +    * *v1ptr = v0 +    */ +   brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); +   brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); +   brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); +   brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); +   brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0)); + +   loop = brw_DO(p, BRW_EXECUTE_1); +   { +      brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); +      brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2)); +      brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + +      /* draw edge if edgeflag != 0 */ +      brw_CMP(p,  +	      vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,  +	      deref_1f(v0, c->offset[VERT_RESULT_EDGE]), +	      brw_imm_f(0)); +      draw_edge = brw_IF(p, BRW_EXECUTE_1); +      { +	 brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); +	 brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); +      } +      brw_ENDIF(p, draw_edge); + +      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); +      brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); +   } +   brw_WHILE(p, loop); +} + + + +static void emit_points(struct brw_clip_compile *c, +			boolean do_offset ) +{ +   struct brw_compile *p = &c->func; +   struct brw_instruction *loop; +   struct brw_instruction *draw_point; + +   struct brw_indirect v0 = brw_indirect(0, 0); +   struct brw_indirect v0ptr = brw_indirect(2, 0); + +   brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); +   brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + +   loop = brw_DO(p, BRW_EXECUTE_1); +   { +      brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); +      brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + +      /* draw if edgeflag != 0  +       */ +      brw_CMP(p,  +	      vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,  +	      deref_1f(v0, c->offset[VERT_RESULT_EDGE]), +	      brw_imm_f(0)); +      draw_point = brw_IF(p, BRW_EXECUTE_1); +      { +	 if (do_offset) +	    apply_one_offset(c, v0); + +	 brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END); +      } +      brw_ENDIF(p, draw_point); + +      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); +      brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); +   } +   brw_WHILE(p, loop); +} + + + + + + + +static void emit_primitives( struct brw_clip_compile *c, +			     unsigned mode,  +			     boolean do_offset ) +{ +   switch (mode) { +   case CLIP_FILL: +      brw_clip_tri_emit_polygon(c); +      break; + +   case CLIP_LINE: +      emit_lines(c, do_offset); +      break; + +   case CLIP_POINT: +      emit_points(c, do_offset); +      break; + +   case CLIP_CULL: +      assert(0); +      break; +   } +}  + + + +static void emit_unfilled_primitives( struct brw_clip_compile *c ) +{ +   struct brw_compile *p = &c->func; +   struct brw_instruction *ccw; + +   /* Direction culling has already been done. +    */ +   if (c->key.fill_ccw != c->key.fill_cw && +       c->key.fill_ccw != CLIP_CULL && +       c->key.fill_cw != CLIP_CULL) +   { +      brw_CMP(p, +	      vec1(brw_null_reg()), +	      BRW_CONDITIONAL_GE, +	      get_element(c->reg.dir, 2), +	      brw_imm_f(0)); +    +      ccw = brw_IF(p, BRW_EXECUTE_1); +      { +	 emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); +      } +      ccw = brw_ELSE(p, ccw); +      { +	 emit_primitives(c, c->key.fill_cw, c->key.offset_cw); +      } +      brw_ENDIF(p, ccw); +   } +   else if (c->key.fill_cw != CLIP_CULL) { +      emit_primitives(c, c->key.fill_cw, c->key.offset_cw); +   } +   else if (c->key.fill_ccw != CLIP_CULL) {  +      emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); +   } +} + + + + +static void check_nr_verts( struct brw_clip_compile *c ) +{ +   struct brw_compile *p = &c->func; +   struct brw_instruction *if_insn; + +   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3));       +   if_insn = brw_IF(p, BRW_EXECUTE_1); +   { +      brw_clip_kill_thread(c); +   } +   brw_ENDIF(p, if_insn); +} + + +void brw_emit_unfilled_clip( struct brw_clip_compile *c ) +{ +   struct brw_compile *p = &c->func; +   struct brw_instruction *do_clip; +    + +   c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) || +			(c->key.fill_ccw != c->key.fill_cw) || +			c->key.fill_ccw == CLIP_CULL || +			c->key.fill_cw == CLIP_CULL || +			c->key.copy_bfc_cw || +			c->key.copy_bfc_ccw); + +   brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); +   brw_clip_tri_init_vertices(c); + +   assert(c->offset[VERT_RESULT_EDGE]); + +   if (c->key.fill_ccw == CLIP_CULL && +       c->key.fill_cw == CLIP_CULL) { +      brw_clip_kill_thread(c); +      return; +   } + +   merge_edgeflags(c); + +   /* Need to use the inlist indirection here:  +    */ +   if (c->need_direction)  +      compute_tri_direction(c); +    +   if (c->key.fill_ccw == CLIP_CULL || +       c->key.fill_cw == CLIP_CULL) +      cull_direction(c); + +   if (c->key.offset_ccw || +       c->key.offset_cw) +      compute_offset(c); + +   if (c->key.copy_bfc_ccw || +       c->key.copy_bfc_cw) +      copy_bfc(c); + +   /* Need to do this whether we clip or not: +    */ +   if (c->key.do_flat_shading) +      brw_clip_tri_flat_shade(c); +    +   brw_clip_init_clipmask(c); +   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); +   do_clip = brw_IF(p, BRW_EXECUTE_1); +   { +      brw_clip_init_planes(c); +      brw_clip_tri(c); +      check_nr_verts(c); +   } +   brw_ENDIF(p, do_clip); +    +   emit_unfilled_primitives(c); +   brw_clip_kill_thread(c); +} + + + diff --git a/src/gallium/drivers/i965simple/brw_clip_util.c b/src/gallium/drivers/i965simple/brw_clip_util.c new file mode 100644 index 0000000000..6d58ceafff --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_util.c @@ -0,0 +1,351 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + + + +static struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ +   struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + +   if (++c->last_tmp > c->prog_data.total_grf) +      c->prog_data.total_grf = c->last_tmp; + +   return tmp; +} + +static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp ) +{ +   if (tmp.nr == c->last_tmp-1) +      c->last_tmp--; +} + + +static struct brw_reg make_plane_ud(unsigned x, unsigned y, unsigned z, unsigned w) +{ +   return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x); +} + + +void brw_clip_init_planes( struct brw_clip_compile *c ) +{ +   struct brw_compile *p = &c->func; + +   if (!c->key.nr_userclip) { +      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0,    0, 0xff, 1)); +      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0,    0,    1, 1)); +      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff,    0, 1)); +      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0,    1,    0, 1)); +      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff,  0,    0, 1)); +      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1,    0,    0, 1)); +   } +} + + + +#define W 3 + +/* Project 'pos' to screen space (or back again), overwrite with results: + */ +static void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) +{ +   struct brw_compile *p = &c->func; + +   /* calc rhw +    */ +   brw_math_invert(p, get_element(pos, W), get_element(pos, W)); + +   /* value.xyz *= value.rhw +    */ +   brw_set_access_mode(p, BRW_ALIGN_16); +   brw_MUL(p, brw_writemask(pos, TGSI_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W)); +   brw_set_access_mode(p, BRW_ALIGN_1); +} + + +static void brw_clip_project_vertex( struct brw_clip_compile *c, +				     struct brw_indirect vert_addr ) +{ +#if 0 +   struct brw_compile *p = &c->func; +   struct brw_reg tmp = get_tmp(c); + +   /* Fixup position.  Extract from the original vertex and re-project +    * to screen space: +    */ +   brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS])); +   brw_clip_project_position(c, tmp); +   brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp); + +   release_tmp(c, tmp); +#else +         #warning "disabled" +#endif +} + + + + +/* Interpolate between two vertices and put the result into a0.0. + * Increment a0.0 accordingly. + */ +void brw_clip_interp_vertex( struct brw_clip_compile *c, +			     struct brw_indirect dest_ptr, +			     struct brw_indirect v0_ptr, /* from */ +			     struct brw_indirect v1_ptr, /* to */ +			     struct brw_reg t0, +			     boolean force_edgeflag) +{ +#if 0 +   struct brw_compile *p = &c->func; +   struct brw_reg tmp = get_tmp(c); +   unsigned i; + +   /* Just copy the vertex header: +    */ +   brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); + +   /* Iterate over each attribute (could be done in pairs?) +    */ +   for (i = 0; i < c->nr_attrs; i++) { +      unsigned delta = i*16 + 32; + +      if (delta == c->offset[VERT_RESULT_EDGE]) { +	 if (force_edgeflag) +	    brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); +	 else +	    brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); +      } +      else { +	 /* Interpolate: +	  * +	  *        New = attr0 + t*attr1 - t*attr0 +	  */ +	 brw_MUL(p, +		 vec4(brw_null_reg()), +		 deref_4f(v1_ptr, delta), +		 t0); + +	 brw_MAC(p, +		 tmp, +		 negate(deref_4f(v0_ptr, delta)), +		 t0); + +	 brw_ADD(p, +		 deref_4f(dest_ptr, delta), +		 deref_4f(v0_ptr, delta), +		 tmp); +      } +   } + +   if (i & 1) { +      unsigned delta = i*16 + 32; +      brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); +   } + +   release_tmp(c, tmp); + +   /* Recreate the projected (NDC) coordinate in the new vertex +    * header: +    */ +   brw_clip_project_vertex(c, dest_ptr ); +#else +         #warning "disabled" +#endif +} + + + + +#define MAX_MRF 16 + +void brw_clip_emit_vue(struct brw_clip_compile *c, +		       struct brw_indirect vert, +		       boolean allocate, +		       boolean eot, +		       unsigned header) +{ +   struct brw_compile *p = &c->func; +   unsigned start = c->last_mrf; + +   assert(!(allocate && eot)); + +   /* Cycle through mrf regs - probably futile as we have to wait for +    * the allocation response anyway.  Also, the order this function +    * is invoked doesn't correspond to the order the instructions will +    * be executed, so it won't have any effect in many cases. +    */ +#if 0 +   if (start + c->nr_regs + 1 >= MAX_MRF) +      start = 0; + +   c->last_mrf = start + c->nr_regs + 1; +#endif + +   /* Copy the vertex from vertn into m1..mN+1: +    */ +   brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs); + +   /* Overwrite PrimType and PrimStart in the message header, for +    * each vertex in turn: +    */ +   brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + +   /* Send each vertex as a seperate write to the urb.  This +    * is different to the concept in brw_sf_emit.c, where +    * subsequent writes are used to build up a single urb +    * entry.  Each of these writes instantiates a seperate +    * urb entry - (I think... what about 'allocate'?) +    */ +   brw_urb_WRITE(p, +		 allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), +		 start, +		 c->reg.R0, +		 allocate, +		 1,		/* used */ +		 c->nr_regs + 1, /* msg length */ +		 allocate ? 1 : 0, /* response_length */ +		 eot,		/* eot */ +		 1,		/* writes_complete */ +		 0,		/* urb offset */ +		 BRW_URB_SWIZZLE_NONE); +} + + + +void brw_clip_kill_thread(struct brw_clip_compile *c) +{ +   struct brw_compile *p = &c->func; + +   /* Send an empty message to kill the thread and release any +    * allocated urb entry: +    */ +   brw_urb_WRITE(p, +		 retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), +		 0, +		 c->reg.R0, +		 0,		/* allocate */ +		 0,		/* used */ +		 0, 		/* msg len */ +		 0, 		/* response len */ +		 1, 		/* eot */ +		 1,		/* writes complete */ +		 0, +		 BRW_URB_SWIZZLE_NONE); +} + + + + +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ) +{ +   return brw_address(c->reg.fixed_planes); +} + + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ) +{ +   if (c->key.nr_userclip) { +      return brw_imm_uw(16); +   } +   else { +      return brw_imm_uw(4); +   } +} + + +/* If flatshading, distribute color from provoking vertex prior to + * clipping. + */ +void brw_clip_copy_colors( struct brw_clip_compile *c, +			   unsigned to, unsigned from ) +{ +#if 0 +   struct brw_compile *p = &c->func; + +   if (c->offset[VERT_RESULT_COL0]) +      brw_MOV(p, +	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]), +	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0])); + +   if (c->offset[VERT_RESULT_COL1]) +      brw_MOV(p, +	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]), +	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1])); + +   if (c->offset[VERT_RESULT_BFC0]) +      brw_MOV(p, +	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]), +	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0])); + +   if (c->offset[VERT_RESULT_BFC1]) +      brw_MOV(p, +	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]), +	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1])); +#else +         #warning "disabled" +#endif +} + + + +void brw_clip_init_clipmask( struct brw_clip_compile *c ) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg incoming = get_element_ud(c->reg.R0, 2); + +   /* Shift so that lowest outcode bit is rightmost: +    */ +   brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26)); + +   if (c->key.nr_userclip) { +      struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD); + +      /* Rearrange userclip outcodes so that they come directly after +       * the fixed plane bits. +       */ +      brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14)); +      brw_SHR(p, tmp, tmp, brw_imm_ud(8)); +      brw_OR(p, c->reg.planemask, c->reg.planemask, tmp); + +      release_tmp(c, tmp); +   } +} + diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c new file mode 100644 index 0000000000..c74cbf8d73 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -0,0 +1,114 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_draw.h" +#include "brw_vs.h" +#include "brw_tex_layout.h" +#include "brw_winsys.h" + +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_context.h" +#include "util/u_memory.h" +#include "pipe/p_screen.h" + + +#ifndef BRW_DEBUG +int BRW_DEBUG = (0); +#endif + + +static void brw_destroy(struct pipe_context *pipe) +{ +   struct brw_context *brw = brw_context(pipe); + +   if(brw->winsys->destroy) +      brw->winsys->destroy(brw->winsys); +    +   FREE(brw); +} + + +static void brw_clear(struct pipe_context *pipe, struct pipe_surface *ps, +                      unsigned clearValue) +{ +   int x, y, w, h; +   /* FIXME: corny... */ + +   x = 0; +   y = 0; +   w = ps->width; +   h = ps->height; + +   pipe->surface_fill(pipe, ps, x, y, w, h, clearValue); +} + + +struct pipe_context *brw_create(struct pipe_screen *screen, +                                struct brw_winsys *brw_winsys, +                                unsigned pci_id) +{ +   struct brw_context *brw; + +   debug_printf("%s: creating brw_context with pci id 0x%x\n", +                __FUNCTION__, pci_id); + +   brw = CALLOC_STRUCT(brw_context); +   if (brw == NULL) +      return NULL; + +   brw->winsys = brw_winsys; +   brw->pipe.winsys = screen->winsys; +   brw->pipe.screen = screen; + +   brw->pipe.destroy = brw_destroy; +   brw->pipe.clear = brw_clear; + +   brw_init_surface_functions(brw); +   brw_init_texture_functions(brw); +   brw_init_state_functions(brw); +   brw_init_flush_functions(brw); +   brw_init_draw_functions( brw ); + + +   brw_init_state( brw ); + +   brw->pci_id = pci_id; +   brw->dirty = ~0; +   brw->hardware_dirty = ~0; + +   memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind)); + +   return &brw->pipe; +} + diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h new file mode 100644 index 0000000000..3079485180 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -0,0 +1,684 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#ifndef BRWCONTEXT_INC +#define BRWCONTEXT_INC + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "tgsi/tgsi_scan.h" + +#include "brw_structs.h" +#include "brw_winsys.h" + + +/* Glossary: + * + * URB - uniform resource buffer.  A mid-sized buffer which is + * partitioned between the fixed function units and used for passing + * values (vertices, primitives, constants) between them. + * + * CURBE - constant URB entry.  An urb region (entry) used to hold + * constant values which the fixed function units can be instructed to + * preload into the GRF when spawining a thread. + * + * VUE - vertex URB entry.  An urb entry holding a vertex and usually + * a vertex header.  The header contains control information and + * things like primitive type, Begin/end flags and clip codes. + * + * PUE - primitive URB entry.  An urb entry produced by the setup (SF) + * unit holding rasterization and interpolation parameters. + * + * GRF - general register file.  One of several register files + * addressable by programmed threads.  The inputs (r0, payload, curbe, + * urb) of the thread are preloaded to this area before the thread is + * spawned.  The registers are individually 8 dwords wide and suitable + * for general usage.  Registers holding thread input values are not + * special and may be overwritten. + * + * MRF - message register file.  Threads communicate (and terminate) + * by sending messages.  Message parameters are placed in contigous + * MRF registers.  All program output is via these messages.  URB + * entries are populated by sending a message to the shared URB + * function containing the new data, together with a control word, + * often an unmodified copy of R0. + * + * R0 - GRF register 0.  Typically holds control information used when + * sending messages to other threads. + * + * EU or GEN4 EU: The name of the programmable subsystem of the + * i965 hardware.  Threads are executed by the EU, the registers + * described above are part of the EU architecture. + * + * Fixed function units: + * + * CS - Command streamer.  Notional first unit, little software + * interaction.  Holds the URB entries used for constant data, ie the + * CURBEs. + * + * VF/VS - Vertex Fetch / Vertex Shader.  The fixed function part of + * this unit is responsible for pulling vertices out of vertex buffers + * in vram and injecting them into the processing pipe as VUEs.  If + * enabled, it first passes them to a VS thread which is a good place + * for the driver to implement any active vertex shader. + * + * GS - Geometry Shader.  This corresponds to a new DX10 concept.  If + * enabled, incoming strips etc are passed to GS threads in individual + * line/triangle/point units.  The GS thread may perform arbitary + * computation and emit whatever primtives with whatever vertices it + * chooses.  This makes GS an excellent place to implement GL's + * unfilled polygon modes, though of course it is capable of much + * more.  Additionally, GS is used to translate away primitives not + * handled by latter units, including Quads and Lineloops. + * + * CS - Clipper.  Mesa's clipping algorithms are imported to run on + * this unit.  The fixed function part performs cliptesting against + * the 6 fixed clipplanes and makes descisions on whether or not the + * incoming primitive needs to be passed to a thread for clipping. + * User clip planes are handled via cooperation with the VS thread. + * + * SF - Strips Fans or Setup: Triangles are prepared for + * rasterization.  Interpolation coefficients are calculated. + * Flatshading and two-side lighting usually performed here. + * + * WM - Windower.  Interpolation of vertex attributes performed here. + * Fragment shader implemented here.  SIMD aspects of EU taken full + * advantage of, as pixels are processed in blocks of 16. + * + * CC - Color Calculator.  No EU threads associated with this unit. + * Handles blending and (presumably) depth and stencil testing. + */ + +#define BRW_MAX_CURBE                    (32*16) + +struct brw_context; +struct brw_winsys; + + +/* Raised when we receive new state across the pipe interface: + */ +#define BRW_NEW_VIEWPORT                0x1 +#define BRW_NEW_RASTERIZER              0x2 +#define BRW_NEW_FS                      0x4 +#define BRW_NEW_BLEND                   0x8 +#define BRW_NEW_CLIP                    0x10 +#define BRW_NEW_SCISSOR                 0x20 +#define BRW_NEW_STIPPLE                 0x40 +#define BRW_NEW_FRAMEBUFFER             0x80 +#define BRW_NEW_ALPHA_TEST              0x100 +#define BRW_NEW_DEPTH_STENCIL           0x200 +#define BRW_NEW_SAMPLER                 0x400 +#define BRW_NEW_TEXTURE                 0x800 +#define BRW_NEW_CONSTANTS               0x1000 +#define BRW_NEW_VBO                     0x2000 +#define BRW_NEW_VS                      0x4000 + +/* Raised for other internal events: + */ +#define BRW_NEW_URB_FENCE               0x10000 +#define BRW_NEW_PSP                     0x20000 +#define BRW_NEW_CURBE_OFFSETS           0x40000 +#define BRW_NEW_REDUCED_PRIMITIVE       0x80000 +#define BRW_NEW_PRIMITIVE               0x100000 +#define BRW_NEW_SCENE                 0x200000 +#define BRW_NEW_SF_LINKAGE              0x400000 + +extern int BRW_DEBUG; + +#define DEBUG_TEXTURE	0x1 +#define DEBUG_STATE	0x2 +#define DEBUG_IOCTL	0x4 +#define DEBUG_PRIMS	0x8 +#define DEBUG_VERTS	0x10 +#define DEBUG_FALLBACKS	0x20 +#define DEBUG_VERBOSE	0x40 +#define DEBUG_DRI       0x80 +#define DEBUG_DMA       0x100 +#define DEBUG_SANITY    0x200 +#define DEBUG_SYNC      0x400 +#define DEBUG_SLEEP     0x800 +#define DEBUG_PIXEL     0x1000 +#define DEBUG_STATS     0x2000 +#define DEBUG_TILE      0x4000 +#define DEBUG_SINGLE_THREAD   0x8000 +#define DEBUG_WM        0x10000 +#define DEBUG_URB       0x20000 +#define DEBUG_VS        0x40000 +#define DEBUG_BATCH	0x80000 +#define DEBUG_BUFMGR	0x100000 +#define DEBUG_BLIT	0x200000 +#define DEBUG_REGION	0x400000 +#define DEBUG_MIPTREE	0x800000 + +#define DBG(...) do {						\ +   if (BRW_DEBUG & FILE_DEBUG_FLAG)				\ +      debug_printf(__VA_ARGS__);				\ +} while(0) + +#define PRINT(...) do {						\ +   debug_printf(__VA_ARGS__);			                \ +} while(0) + +struct brw_state_flags { +   unsigned cache; +   unsigned brw; +}; + + +struct brw_vertex_program { +   struct pipe_shader_state program; +   struct tgsi_shader_info info; +   int id; +}; + + +struct brw_fragment_program { +   struct pipe_shader_state program; +   struct tgsi_shader_info info; +    +   boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ +   int id; +}; + + +struct pipe_setup_linkage { +   struct { +      unsigned vp_output:5; +      unsigned interp_mode:4; +      unsigned bf_vp_output:5; +   } fp_input[PIPE_MAX_SHADER_INPUTS]; + +   unsigned fp_input_count:5; +   unsigned max_vp_output:5; +}; +    + + +struct brw_texture { +   struct pipe_texture base; + +   /* Derived from the above: +    */ +   unsigned stride; +   unsigned depth_pitch;          /* per-image on i945? */ +   unsigned total_nblocksy; + +   unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + +   /* Explicitly store the offset of each image for each cube face or +    * depth value.  Pretty much have to accept that hardware formats +    * are going to be so diverse that there is no unified way to +    * compute the offsets of depth/cube images within a mipmap level, +    * so have to store them as a lookup table: +    */ +   unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS];   /**< array [depth] of offsets */ + +   /* Includes image offset tables: +    */ +   unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + +   /* The data is held here: +    */ +   struct pipe_buffer *buffer; +}; + +/* Data about a particular attempt to compile a program.  Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ +/* Data about a particular attempt to compile a program.  Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ + +struct brw_wm_prog_data { +   unsigned curb_read_length; +   unsigned urb_read_length; + +   unsigned first_curbe_grf; +   unsigned total_grf; +   unsigned total_scratch; + +   /* Internally generated constants for the CURBE.  These are loaded +    * ahead of the data from the constant buffer. +    */ +   const float internal_const[8]; +   unsigned nr_internal_consts; +   unsigned max_const; + +   boolean error; +}; + +struct brw_sf_prog_data { +   unsigned urb_read_length; +   unsigned total_grf; + +   /* Each vertex may have upto 12 attributes, 4 components each, +    * except WPOS which requires only 2.  (11*4 + 2) == 44 ==> 11 +    * rows. +    * +    * Actually we use 4 for each, so call it 12 rows. +    */ +   unsigned urb_entry_size; +}; + +struct brw_clip_prog_data { +   unsigned curb_read_length;	/* user planes? */ +   unsigned clip_mode; +   unsigned urb_read_length; +   unsigned total_grf; +}; + +struct brw_gs_prog_data { +   unsigned urb_read_length; +   unsigned total_grf; +}; + +struct brw_vs_prog_data { +   unsigned curb_read_length; +   unsigned urb_read_length; +   unsigned total_grf; +   unsigned outputs_written; + +   unsigned inputs_read; + +   unsigned max_const; + +   float    imm_buf[PIPE_MAX_CONSTANT][4]; +   unsigned num_imm; +   unsigned num_consts; + +   /* Used for calculating urb partitions: +    */ +   unsigned urb_entry_size; +}; + + +#define BRW_MAX_TEX_UNIT 8 +#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 + +/* Create a fixed sized struct for caching binding tables: + */ +struct brw_surface_binding_table { +   unsigned surf_ss_offset[BRW_WM_MAX_SURF]; +}; + + +struct brw_cache; + +struct brw_mem_pool { +   struct pipe_buffer *buffer; + +   unsigned size; +   unsigned offset;		/* offset of first free byte */ + +   struct brw_context *brw; +}; + +struct brw_cache_item { +   unsigned hash; +   unsigned key_size;		/* for variable-sized keys */ +   const void *key; + +   unsigned offset;		/* offset within pool's buffer */ +   unsigned data_size; + +   struct brw_cache_item *next; +}; + + + +struct brw_cache { +   unsigned id; + +   const char *name; + +   struct brw_context *brw; +   struct brw_mem_pool *pool; + +   struct brw_cache_item **items; +   unsigned size, n_items; + +   unsigned key_size;		/* for fixed-size keys */ +   unsigned aux_size; + +   unsigned last_addr;			/* offset of active item */ +}; + + + + +/* Considered adding a member to this struct to document which flags + * an update might raise so that ordering of the state atoms can be + * checked or derived at runtime.  Dropped the idea in favor of having + * a debug mode where the state is monitored for flags which are + * raised that have already been tested against. + */ +struct brw_tracked_state { +   struct brw_state_flags dirty; +   void (*update)( struct brw_context *brw ); +}; + + +/* Flags for brw->state.cache. + */ +#define CACHE_NEW_CC_VP                  (1<<BRW_CC_VP) +#define CACHE_NEW_CC_UNIT                (1<<BRW_CC_UNIT) +#define CACHE_NEW_WM_PROG                (1<<BRW_WM_PROG) +#define CACHE_NEW_SAMPLER_DEFAULT_COLOR  (1<<BRW_SAMPLER_DEFAULT_COLOR) +#define CACHE_NEW_SAMPLER                (1<<BRW_SAMPLER) +#define CACHE_NEW_WM_UNIT                (1<<BRW_WM_UNIT) +#define CACHE_NEW_SF_PROG                (1<<BRW_SF_PROG) +#define CACHE_NEW_SF_VP                  (1<<BRW_SF_VP) +#define CACHE_NEW_SF_UNIT                (1<<BRW_SF_UNIT) +#define CACHE_NEW_VS_UNIT                (1<<BRW_VS_UNIT) +#define CACHE_NEW_VS_PROG                (1<<BRW_VS_PROG) +#define CACHE_NEW_GS_UNIT                (1<<BRW_GS_UNIT) +#define CACHE_NEW_GS_PROG                (1<<BRW_GS_PROG) +#define CACHE_NEW_CLIP_VP                (1<<BRW_CLIP_VP) +#define CACHE_NEW_CLIP_UNIT              (1<<BRW_CLIP_UNIT) +#define CACHE_NEW_CLIP_PROG              (1<<BRW_CLIP_PROG) +#define CACHE_NEW_SURFACE                (1<<BRW_SS_SURFACE) +#define CACHE_NEW_SURF_BIND              (1<<BRW_SS_SURF_BIND) + + + + +enum brw_mempool_id { +   BRW_GS_POOL, +   BRW_SS_POOL, +   BRW_MAX_POOL +}; + + +struct brw_cached_batch_item { +   struct header *header; +   unsigned sz; +   struct brw_cached_batch_item *next; +}; + + + +/* Protect against a future where PIPE_MAX_ATTRIBS > 32.  Wouldn't life + * be easier if C allowed arrays of packed elements? + */ +#define ATTRIB_BIT_DWORDS  ((PIPE_MAX_ATTRIBS+31)/32) + + + + +struct brw_vertex_info { +   unsigned varying;  /* varying:1[PIPE_MAX_ATTRIBS] */ +   unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_MAX_ATTRIBS] */ +}; + + + + + +struct brw_context +{ +   struct pipe_context pipe; +   struct brw_winsys *winsys; + +   unsigned primitive; +   unsigned reduced_primitive; + +   boolean emit_state_always; + +   struct { +      struct brw_state_flags dirty; +   } state; + + +   struct { +      const struct pipe_blend_state         *Blend; +      const struct pipe_depth_stencil_alpha_state *DepthStencil; +      const struct pipe_poly_stipple        *PolygonStipple; +      const struct pipe_rasterizer_state    *Raster; +      const struct pipe_sampler_state       *Samplers[PIPE_MAX_SAMPLERS]; +      const struct brw_vertex_program       *VertexProgram; +      const struct brw_fragment_program     *FragmentProgram; + +      struct pipe_clip_state          Clip; +      struct pipe_blend_color         BlendColor; +      struct pipe_scissor_state       Scissor; +      struct pipe_viewport_state      Viewport; +      struct pipe_framebuffer_state   FrameBuffer; + +      const struct pipe_constant_buffer *Constants[2]; +      const struct brw_texture          *Texture[PIPE_MAX_SAMPLERS]; +   } attribs; + +   unsigned num_samplers; +   unsigned num_textures; + +   struct brw_mem_pool pool[BRW_MAX_POOL]; +   struct brw_cache cache[BRW_MAX_CACHE]; +   struct brw_cached_batch_item *cached_batch_items; + +   struct { + +      /* Arrays with buffer objects to copy non-bufferobj arrays into +       * for upload: +       */ +      const struct pipe_vertex_buffer *vbo_array[PIPE_MAX_ATTRIBS]; + +      struct brw_vertex_element_state inputs[PIPE_MAX_ATTRIBS]; + +#define BRW_NR_UPLOAD_BUFS 17 +#define BRW_UPLOAD_INIT_SIZE (128*1024) + +      /* Summary of size and varying of active arrays, so we can check +       * for changes to this state: +       */ +      struct brw_vertex_info info; +   } vb; + + +   unsigned hardware_dirty; +   unsigned dirty; +   unsigned pci_id; +   /* BRW_NEW_URB_ALLOCATIONS: +    */ +   struct { +      unsigned vsize;		/* vertex size plus header in urb registers */ +      unsigned csize;		/* constant buffer size in urb registers */ +      unsigned sfsize;		/* setup data size in urb registers */ + +      boolean constrained; + +      unsigned nr_vs_entries; +      unsigned nr_gs_entries; +      unsigned nr_clip_entries; +      unsigned nr_sf_entries; +      unsigned nr_cs_entries; + +/*       unsigned vs_size; */ +/*       unsigned gs_size; */ +/*       unsigned clip_size; */ +/*       unsigned sf_size; */ +/*       unsigned cs_size; */ + +      unsigned vs_start; +      unsigned gs_start; +      unsigned clip_start; +      unsigned sf_start; +      unsigned cs_start; +   } urb; + + +   /* BRW_NEW_CURBE_OFFSETS: +    */ +   struct { +      unsigned wm_start; +      unsigned wm_size; +      unsigned clip_start; +      unsigned clip_size; +      unsigned vs_start; +      unsigned vs_size; +      unsigned total_size; + +      unsigned gs_offset; + +      float *last_buf; +      unsigned last_bufsz; +   } curbe; + +   struct { +      struct brw_vs_prog_data *prog_data; + +      unsigned prog_gs_offset; +      unsigned state_gs_offset; +   } vs; + +   struct { +      struct brw_gs_prog_data *prog_data; + +      boolean prog_active; +      unsigned prog_gs_offset; +      unsigned state_gs_offset; +   } gs; + +   struct { +      struct brw_clip_prog_data *prog_data; + +      unsigned prog_gs_offset; +      unsigned vp_gs_offset; +      unsigned state_gs_offset; +   } clip; + + +   struct { +      struct brw_sf_prog_data *prog_data; + +      struct pipe_setup_linkage linkage; + +      unsigned prog_gs_offset; +      unsigned vp_gs_offset; +      unsigned state_gs_offset; +   } sf; + +   struct { +      struct brw_wm_prog_data *prog_data; + +//      struct brw_wm_compiler *compile_data; + + +      /** +       * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER +       * cache +       */ +      struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; + +      unsigned render_surf; +      unsigned nr_surfaces; + +      unsigned max_threads; +      struct pipe_buffer *scratch_buffer; +      unsigned scratch_buffer_size; + +      unsigned sampler_count; +      unsigned sampler_gs_offset; + +      struct brw_surface_binding_table bind; +      unsigned bind_ss_offset; + +      unsigned prog_gs_offset; +      unsigned state_gs_offset; +   } wm; + + +   struct { +      unsigned vp_gs_offset; +      unsigned state_gs_offset; +   } cc; + + +   /* Used to give every program string a unique id +    */ +   unsigned program_id; +}; + + +#define BRW_PACKCOLOR8888(r,g,b,a)  ((r<<24) | (g<<16) | (b<<8) | a) + + +/*====================================================================== + * brw_vtbl.c + */ +void brw_do_flush( struct brw_context *brw, +		   unsigned flags ); + + +/*====================================================================== + * brw_state.c + */ +void brw_validate_state(struct brw_context *brw); +void brw_init_state(struct brw_context *brw); +void brw_destroy_state(struct brw_context *brw); + + +/*====================================================================== + * brw_tex.c + */ +void brwUpdateTextureState( struct brw_context *brw ); + + +/* brw_urb.c + */ +void brw_upload_urb_fence(struct brw_context *brw); + +void brw_upload_constant_buffer_state(struct brw_context *brw); + +void brw_init_surface_functions(struct brw_context *brw); +void brw_init_state_functions(struct brw_context *brw); +void brw_init_flush_functions(struct brw_context *brw); +void brw_init_string_functions(struct brw_context *brw); + +/*====================================================================== + * Inline conversion functions.  These are better-typed than the + * macros used previously: + */ +static inline struct brw_context * +brw_context( struct pipe_context *ctx ) +{ +   return (struct brw_context *)ctx; +} + +#endif + diff --git a/src/gallium/drivers/i965simple/brw_curbe.c b/src/gallium/drivers/i965simple/brw_curbe.c new file mode 100644 index 0000000000..904cde8e30 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_curbe.c @@ -0,0 +1,369 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_batch.h" +#include "brw_util.h" +#include "brw_wm.h" +#include "pipe/p_state.h" +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#define FILE_DEBUG_FLAG DEBUG_FALLBACKS + +/* Partition the CURBE between the various users of constant values: + */ +static void calculate_curbe_offsets( struct brw_context *brw ) +{ +   /* CACHE_NEW_WM_PROG */ +   unsigned nr_fp_regs = align(brw->wm.prog_data->max_const, 16); + +   /* BRW_NEW_VERTEX_PROGRAM */ +   unsigned nr_vp_regs = align(brw->vs.prog_data->max_const, 16); +   unsigned nr_clip_regs = 0; +   unsigned total_regs; + +#if 0 +   /* BRW_NEW_CLIP ? */ +   if (brw->attribs.Transform->ClipPlanesEnabled) { +      unsigned nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled); +      nr_clip_regs = align(nr_planes * 4, 16); +   } +#endif + + +   total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; + +   /* This can happen - what to do?  Probably rather than falling +    * back, the best thing to do is emit programs which code the +    * constants as immediate values.  Could do this either as a static +    * cap on WM and VS, or adaptively. +    * +    * Unfortunately, this is currently dependent on the results of the +    * program generation process (in the case of wm), so this would +    * introduce the need to re-generate programs in the event of a +    * curbe allocation failure. +    */ +   /* Max size is 32 - just large enough to +    * hold the 128 parameters allowed by +    * the fragment and vertex program +    * api's.  It's not clear what happens +    * when both VP and FP want to use 128 +    * parameters, though. +    */ +   assert(total_regs <= 32); + +   /* Lazy resize: +    */ +   if (nr_fp_regs > brw->curbe.wm_size || +       nr_vp_regs > brw->curbe.vs_size || +       nr_clip_regs != brw->curbe.clip_size || +       (total_regs < brw->curbe.total_size / 4 && +	brw->curbe.total_size > 16)) { + +      unsigned reg = 0; + +      /* Calculate a new layout: +       */ +      reg = 0; +      brw->curbe.wm_start = reg; +      brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs; +      brw->curbe.clip_start = reg; +      brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs; +      brw->curbe.vs_start = reg; +      brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; +      brw->curbe.total_size = reg; + +#if 0 +      if (0) +	 DBG("curbe wm %d+%d clip %d+%d vs %d+%d\n", +		      brw->curbe.wm_start, +		      brw->curbe.wm_size, +		      brw->curbe.clip_start, +		      brw->curbe.clip_size, +		      brw->curbe.vs_start, +		      brw->curbe.vs_size ); +#endif + +      brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; +   } +} + + +const struct brw_tracked_state brw_curbe_offsets = { +   .dirty = { +      .brw  = (BRW_NEW_CLIP | +	       BRW_NEW_VS), +      .cache = CACHE_NEW_WM_PROG +   }, +   .update = calculate_curbe_offsets +}; + + + +/* Define the number of curbes within CS's urb allocation.  Multiple + * urb entries -> multiple curbes.  These will be used by + * fixed-function hardware in a double-buffering scheme to avoid a + * pipeline stall each time the contents of the curbe is changed. + */ +void brw_upload_constant_buffer_state(struct brw_context *brw) +{ +   struct brw_constant_buffer_state cbs; +   memset(&cbs, 0, sizeof(cbs)); + +   /* It appears that this is the state packet for the CS unit, ie. the +    * urb entries detailed here are housed in the CS range from the +    * URB_FENCE command. +    */ +   cbs.header.opcode = CMD_CONST_BUFFER_STATE; +   cbs.header.length = sizeof(cbs)/4 - 2; + +   /* BRW_NEW_URB_FENCE */ +   cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries; +   cbs.bits0.urb_entry_size = brw->urb.csize - 1; + +   assert(brw->urb.nr_cs_entries); +   BRW_CACHED_BATCH_STRUCT(brw, &cbs); +} + + +static float fixed_plane[6][4] = { +   { 0,    0,   -1, 1 }, +   { 0,    0,    1, 1 }, +   { 0,   -1,    0, 1 }, +   { 0,    1,    0, 1 }, +   {-1,    0,    0, 1 }, +   { 1,    0,    0, 1 } +}; + +/* Upload a new set of constants.  Too much variability to go into the + * cache mechanism, but maybe would benefit from a comparison against + * the current uploaded set of constants. + */ +static void upload_constant_buffer(struct brw_context *brw) +{ +   struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; +   unsigned sz = brw->curbe.total_size; +   unsigned bufsz = sz * sizeof(float); +   float *buf; +   unsigned i; + + +   if (sz == 0) { +      struct brw_constant_buffer cb; +      cb.header.opcode = CMD_CONST_BUFFER; +      cb.header.length = sizeof(cb)/4 - 2; +      cb.header.valid = 0; +      cb.bits0.buffer_length = 0; +      cb.bits0.buffer_address = 0; +      BRW_BATCH_STRUCT(brw, &cb); + +      if (brw->curbe.last_buf) { +	 free(brw->curbe.last_buf); +	 brw->curbe.last_buf = NULL; +	 brw->curbe.last_bufsz  = 0; +      } + +      return; +   } + +   buf = (float *)malloc(bufsz); + +   memset(buf, 0, bufsz); + +   if (brw->curbe.wm_size) { +      unsigned offset = brw->curbe.wm_start * 16; + +      /* First the constant buffer constants: +       */ +       +      /* Then any internally generated constants:  +       */ +      for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++) +	 buf[offset + i] = brw->wm.prog_data->internal_const[i]; + +      assert(brw->wm.prog_data->max_const ==  +	     brw->wm.prog_data->nr_internal_consts); +   } + + +   /* The clipplanes are actually delivered to both CLIP and VS units. +    * VS uses them to calculate the outcode bitmasks. +    */ +   if (brw->curbe.clip_size) { +      unsigned offset = brw->curbe.clip_start * 16; +      unsigned j; + +      /* If any planes are going this way, send them all this way: +       */ +      for (i = 0; i < 6; i++) { +	 buf[offset + i * 4 + 0] = fixed_plane[i][0]; +	 buf[offset + i * 4 + 1] = fixed_plane[i][1]; +	 buf[offset + i * 4 + 2] = fixed_plane[i][2]; +	 buf[offset + i * 4 + 3] = fixed_plane[i][3]; +      } + +      /* Clip planes: BRW_NEW_CLIP: +       */ +      for (j = 0; j < brw->attribs.Clip.nr; j++) { +	 buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0]; +	 buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1]; +	 buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2]; +	 buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3]; +	 i++; +      } +   } + + +   if (brw->curbe.vs_size) { +      unsigned offset = brw->curbe.vs_start * 16; +      /*unsigned nr = vp->max_const;*/ +      const struct pipe_constant_buffer *cbuffer = brw->attribs.Constants[0]; +      struct pipe_winsys *ws = brw->pipe.winsys; +      /* FIXME: buffer size is num_consts + num_immediates */ +      if (brw->vs.prog_data->num_consts) { +         /* map the vertex constant buffer and copy to curbe: */ +         void *data = ws->buffer_map(ws, cbuffer->buffer, 0); +         /* FIXME: this is wrong. the cbuffer->buffer->size currently +          * represents size of consts + immediates. so if we'll +          * have both we'll copy over the end of the buffer +          * with the subsequent memcpy */ +         memcpy(&buf[offset], data, cbuffer->buffer->size); +         ws->buffer_unmap(ws, cbuffer->buffer); +         offset += cbuffer->buffer->size; +      } +      /*immediates*/ +      if (brw->vs.prog_data->num_imm) { +         memcpy(&buf[offset], brw->vs.prog_data->imm_buf, +                brw->vs.prog_data->num_imm * 4 * sizeof(float)); +      } +   } + +   if (1) { +      for (i = 0; i < sz; i+=4) +	 debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, +		      buf[i+0], buf[i+1], buf[i+2], buf[i+3]); + +      debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n", +		   brw->curbe.last_buf, buf, +		   bufsz, brw->curbe.last_bufsz, +		   brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); +   } + +   if (brw->curbe.last_buf && +       bufsz == brw->curbe.last_bufsz && +       memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { +      free(buf); +/*       return; */ +   } +   else { +      if (brw->curbe.last_buf) +	 free(brw->curbe.last_buf); +      brw->curbe.last_buf = buf; +      brw->curbe.last_bufsz = bufsz; + + +      if (!brw_pool_alloc(pool, +			  bufsz, +			  1 << 6, +			  &brw->curbe.gs_offset)) { +	 debug_printf("out of GS memory for curbe\n"); +	 assert(0); +	 return; +      } + + +      /* Copy data to the buffer: +       */ +      brw->winsys->buffer_subdata_typed(brw->winsys, +					pool->buffer,  +					brw->curbe.gs_offset,  +					bufsz,  +					buf, +					BRW_CONSTANT_BUFFER ); +   } + +   /* TODO: only emit the constant_buffer packet when necessary, ie: +      - contents have changed +      - offset has changed +      - hw requirements due to other packets emitted. +   */ +   { +      struct brw_constant_buffer cb; + +      memset(&cb, 0, sizeof(cb)); + +      cb.header.opcode = CMD_CONST_BUFFER; +      cb.header.length = sizeof(cb)/4 - 2; +      cb.header.valid = 1; +      cb.bits0.buffer_length = sz - 1; +      cb.bits0.buffer_address = brw->curbe.gs_offset >> 6; + +      /* Because this provokes an action (ie copy the constants into the +       * URB), it shouldn't be shortcircuited if identical to the +       * previous time - because eg. the urb destination may have +       * changed, or the urb contents different to last time. +       * +       * Note that the data referred to is actually copied internally, +       * not just used in place according to passed pointer. +       * +       * It appears that the CS unit takes care of using each available +       * URB entry (Const URB Entry == CURBE) in turn, and issuing +       * flushes as necessary when doublebuffering of CURBEs isn't +       * possible. +       */ +      BRW_BATCH_STRUCT(brw, &cb); +   } +} + +/* This tracked state is unique in that the state it monitors varies + * dynamically depending on the parameters tracked by the fragment and + * vertex programs.  This is the template used as a starting point, + * each context will maintain a copy of this internally and update as + * required. + */ +const struct brw_tracked_state brw_constant_buffer = { +   .dirty = { +      .brw  = (BRW_NEW_CLIP | +	       BRW_NEW_CONSTANTS | +	       BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ +	       BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ +	       BRW_NEW_CURBE_OFFSETS), +      .cache = (CACHE_NEW_WM_PROG) +   }, +   .update = upload_constant_buffer +}; + diff --git a/src/gallium/drivers/i965simple/brw_defines.h b/src/gallium/drivers/i965simple/brw_defines.h new file mode 100644 index 0000000000..9379a397f6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_defines.h @@ -0,0 +1,852 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#ifndef BRW_DEFINES_H +#define BRW_DEFINES_H + +/* + */ +#define MI_NOOP                              0x00 +#define MI_USER_INTERRUPT                    0x02 +#define MI_WAIT_FOR_EVENT                    0x03 +#define MI_FLUSH                             0x04 +#define MI_REPORT_HEAD                       0x07 +#define MI_ARB_ON_OFF                        0x08 +#define MI_BATCH_BUFFER_END                  0x0A +#define MI_OVERLAY_FLIP                      0x11 +#define MI_LOAD_SCAN_LINES_INCL              0x12 +#define MI_LOAD_SCAN_LINES_EXCL              0x13 +#define MI_DISPLAY_BUFFER_INFO               0x14 +#define MI_SET_CONTEXT                       0x18 +#define MI_STORE_DATA_IMM                    0x20 +#define MI_STORE_DATA_INDEX                  0x21 +#define MI_LOAD_REGISTER_IMM                 0x22 +#define MI_STORE_REGISTER_MEM                0x24 +#define MI_BATCH_BUFFER_START                0x31 + +#define MI_SYNCHRONOUS_FLIP                  0x0 +#define MI_ASYNCHRONOUS_FLIP                 0x1 + +#define MI_BUFFER_SECURE                     0x0 +#define MI_BUFFER_NONSECURE                  0x1 + +#define MI_ARBITRATE_AT_CHAIN_POINTS         0x0 +#define MI_ARBITRATE_BETWEEN_INSTS           0x1 +#define MI_NO_ARBITRATION                    0x3 + +#define MI_CONDITION_CODE_WAIT_DISABLED      0x0 +#define MI_CONDITION_CODE_WAIT_0             0x1 +#define MI_CONDITION_CODE_WAIT_1             0x2 +#define MI_CONDITION_CODE_WAIT_2             0x3 +#define MI_CONDITION_CODE_WAIT_3             0x4 +#define MI_CONDITION_CODE_WAIT_4             0x5 + +#define MI_DISPLAY_PIPE_A                    0x0 +#define MI_DISPLAY_PIPE_B                    0x1 + +#define MI_DISPLAY_PLANE_A                   0x0 +#define MI_DISPLAY_PLANE_B                   0x1 +#define MI_DISPLAY_PLANE_C                   0x2 + +#define MI_STANDARD_FLIP                                 0x0 +#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD   0x1 +#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE     0x2 +#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER     0x3 + +#define MI_PHYSICAL_ADDRESS                  0x0 +#define MI_VIRTUAL_ADDRESS                   0x1 + +#define MI_BUFFER_MEMORY_MAIN                0x0 +#define MI_BUFFER_MEMORY_GTT                 0x2 +#define MI_BUFFER_MEMORY_PER_PROCESS_GTT     0x3 + +#define MI_FLIP_CONTINUE                     0x0 +#define MI_FLIP_ON                           0x1 +#define MI_FLIP_OFF                          0x2 + +#define MI_UNTRUSTED_REGISTER_SPACE          0x0 +#define MI_TRUSTED_REGISTER_SPACE            0x1 + +/* 3D state: + */ +#define _3DOP_3DSTATE_PIPELINED       0x0 +#define _3DOP_3DSTATE_NONPIPELINED    0x1 +#define _3DOP_3DCONTROL               0x2 +#define _3DOP_3DPRIMITIVE             0x3 + +#define _3DSTATE_PIPELINED_POINTERS       0x00 +#define _3DSTATE_BINDING_TABLE_POINTERS   0x01 +#define _3DSTATE_VERTEX_BUFFERS           0x08 +#define _3DSTATE_VERTEX_ELEMENTS          0x09 +#define _3DSTATE_INDEX_BUFFER             0x0A +#define _3DSTATE_VF_STATISTICS            0x0B +#define _3DSTATE_DRAWING_RECTANGLE            0x00 +#define _3DSTATE_CONSTANT_COLOR               0x01 +#define _3DSTATE_SAMPLER_PALETTE_LOAD         0x02 +#define _3DSTATE_CHROMA_KEY                   0x04 +#define _3DSTATE_DEPTH_BUFFER                 0x05 +#define _3DSTATE_POLY_STIPPLE_OFFSET          0x06 +#define _3DSTATE_POLY_STIPPLE_PATTERN         0x07 +#define _3DSTATE_LINE_STIPPLE                 0x08 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP    0x09 +#define _3DCONTROL    0x00 +#define _3DPRIMITIVE  0x00 + +#define PIPE_CONTROL_NOWRITE          0x00 +#define PIPE_CONTROL_WRITEIMMEDIATE   0x01 +#define PIPE_CONTROL_WRITEDEPTH       0x02 +#define PIPE_CONTROL_WRITETIMESTAMP   0x03 + +#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 +#define PIPE_CONTROL_GTTWRITE_GLOBAL        0x01 + +#define _3DPRIM_POINTLIST         0x01 +#define _3DPRIM_LINELIST          0x02 +#define _3DPRIM_LINESTRIP         0x03 +#define _3DPRIM_TRILIST           0x04 +#define _3DPRIM_TRISTRIP          0x05 +#define _3DPRIM_TRIFAN            0x06 +#define _3DPRIM_QUADLIST          0x07 +#define _3DPRIM_QUADSTRIP         0x08 +#define _3DPRIM_LINELIST_ADJ      0x09 +#define _3DPRIM_LINESTRIP_ADJ     0x0A +#define _3DPRIM_TRILIST_ADJ       0x0B +#define _3DPRIM_TRISTRIP_ADJ      0x0C +#define _3DPRIM_TRISTRIP_REVERSE  0x0D +#define _3DPRIM_POLYGON           0x0E +#define _3DPRIM_RECTLIST          0x0F +#define _3DPRIM_LINELOOP          0x10 +#define _3DPRIM_POINTLIST_BF      0x11 +#define _3DPRIM_LINESTRIP_CONT    0x12 +#define _3DPRIM_LINESTRIP_BF      0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE  0x15 + +#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 +#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     1 + +#define BRW_ANISORATIO_2     0 +#define BRW_ANISORATIO_4     1 +#define BRW_ANISORATIO_6     2 +#define BRW_ANISORATIO_8     3 +#define BRW_ANISORATIO_10    4 +#define BRW_ANISORATIO_12    5 +#define BRW_ANISORATIO_14    6 +#define BRW_ANISORATIO_16    7 + +#define BRW_BLENDFACTOR_ONE                 0x1 +#define BRW_BLENDFACTOR_SRC_COLOR           0x2 +#define BRW_BLENDFACTOR_SRC_ALPHA           0x3 +#define BRW_BLENDFACTOR_DST_ALPHA           0x4 +#define BRW_BLENDFACTOR_DST_COLOR           0x5 +#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE  0x6 +#define BRW_BLENDFACTOR_CONST_COLOR         0x7 +#define BRW_BLENDFACTOR_CONST_ALPHA         0x8 +#define BRW_BLENDFACTOR_SRC1_COLOR          0x9 +#define BRW_BLENDFACTOR_SRC1_ALPHA          0x0A +#define BRW_BLENDFACTOR_ZERO                0x11 +#define BRW_BLENDFACTOR_INV_SRC_COLOR       0x12 +#define BRW_BLENDFACTOR_INV_SRC_ALPHA       0x13 +#define BRW_BLENDFACTOR_INV_DST_ALPHA       0x14 +#define BRW_BLENDFACTOR_INV_DST_COLOR       0x15 +#define BRW_BLENDFACTOR_INV_CONST_COLOR     0x17 +#define BRW_BLENDFACTOR_INV_CONST_ALPHA     0x18 +#define BRW_BLENDFACTOR_INV_SRC1_COLOR      0x19 +#define BRW_BLENDFACTOR_INV_SRC1_ALPHA      0x1A + +#define BRW_BLENDFUNCTION_ADD               0 +#define BRW_BLENDFUNCTION_SUBTRACT          1 +#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT  2 +#define BRW_BLENDFUNCTION_MIN               3 +#define BRW_BLENDFUNCTION_MAX               4 + +#define BRW_ALPHATEST_FORMAT_UNORM8         0 +#define BRW_ALPHATEST_FORMAT_FLOAT32        1 + +#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH  0 +#define BRW_CHROMAKEY_REPLACE_BLACK      1 + +#define BRW_CLIP_API_OGL     0 +#define BRW_CLIP_API_DX      1 + +#define BRW_CLIPMODE_NORMAL              0 +#define BRW_CLIPMODE_CLIP_ALL            1 +#define BRW_CLIPMODE_CLIP_NON_REJECTED   2 +#define BRW_CLIPMODE_REJECT_ALL          3 +#define BRW_CLIPMODE_ACCEPT_ALL          4 + +#define BRW_CLIP_NDCSPACE     0 +#define BRW_CLIP_SCREENSPACE  1 + +#define BRW_COMPAREFUNCTION_ALWAYS       0 +#define BRW_COMPAREFUNCTION_NEVER        1 +#define BRW_COMPAREFUNCTION_LESS         2 +#define BRW_COMPAREFUNCTION_EQUAL        3 +#define BRW_COMPAREFUNCTION_LEQUAL       4 +#define BRW_COMPAREFUNCTION_GREATER      5 +#define BRW_COMPAREFUNCTION_NOTEQUAL     6 +#define BRW_COMPAREFUNCTION_GEQUAL       7 + +#define BRW_COVERAGE_PIXELS_HALF     0 +#define BRW_COVERAGE_PIXELS_1        1 +#define BRW_COVERAGE_PIXELS_2        2 +#define BRW_COVERAGE_PIXELS_4        3 + +#define BRW_CULLMODE_BOTH        0 +#define BRW_CULLMODE_NONE        1 +#define BRW_CULLMODE_FRONT       2 +#define BRW_CULLMODE_BACK        3 + +#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM      0 +#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT  1 + +#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT     0 +#define BRW_DEPTHFORMAT_D32_FLOAT                1 +#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT        2 +#define BRW_DEPTHFORMAT_D16_UNORM                5 + +#define BRW_FLOATING_POINT_IEEE_754        0 +#define BRW_FLOATING_POINT_NON_IEEE_754    1 + +#define BRW_FRONTWINDING_CW      0 +#define BRW_FRONTWINDING_CCW     1 + +#define BRW_SPRITE_POINT_ENABLE  16 + +#define BRW_INDEX_BYTE     0 +#define BRW_INDEX_WORD     1 +#define BRW_INDEX_DWORD    2 + +#define BRW_LOGICOPFUNCTION_CLEAR            0 +#define BRW_LOGICOPFUNCTION_NOR              1 +#define BRW_LOGICOPFUNCTION_AND_INVERTED     2 +#define BRW_LOGICOPFUNCTION_COPY_INVERTED    3 +#define BRW_LOGICOPFUNCTION_AND_REVERSE      4 +#define BRW_LOGICOPFUNCTION_INVERT           5 +#define BRW_LOGICOPFUNCTION_XOR              6 +#define BRW_LOGICOPFUNCTION_NAND             7 +#define BRW_LOGICOPFUNCTION_AND              8 +#define BRW_LOGICOPFUNCTION_EQUIV            9 +#define BRW_LOGICOPFUNCTION_NOOP             10 +#define BRW_LOGICOPFUNCTION_OR_INVERTED      11 +#define BRW_LOGICOPFUNCTION_COPY             12 +#define BRW_LOGICOPFUNCTION_OR_REVERSE       13 +#define BRW_LOGICOPFUNCTION_OR               14 +#define BRW_LOGICOPFUNCTION_SET              15 + +#define BRW_MAPFILTER_NEAREST        0x0 +#define BRW_MAPFILTER_LINEAR         0x1 +#define BRW_MAPFILTER_ANISOTROPIC    0x2 + +#define BRW_MIPFILTER_NONE        0 +#define BRW_MIPFILTER_NEAREST     1 +#define BRW_MIPFILTER_LINEAR      3 + +#define BRW_POLYGON_FRONT_FACING     0 +#define BRW_POLYGON_BACK_FACING      1 + +#define BRW_PREFILTER_ALWAYS     0x0 +#define BRW_PREFILTER_NEVER      0x1 +#define BRW_PREFILTER_LESS       0x2 +#define BRW_PREFILTER_EQUAL      0x3 +#define BRW_PREFILTER_LEQUAL     0x4 +#define BRW_PREFILTER_GREATER    0x5 +#define BRW_PREFILTER_NOTEQUAL   0x6 +#define BRW_PREFILTER_GEQUAL     0x7 + +#define BRW_PROVOKING_VERTEX_0    0 +#define BRW_PROVOKING_VERTEX_1    1 +#define BRW_PROVOKING_VERTEX_2    2 + +#define BRW_RASTRULE_UPPER_LEFT  0 +#define BRW_RASTRULE_UPPER_RIGHT 1 + +#define BRW_RENDERTARGET_CLAMPRANGE_UNORM    0 +#define BRW_RENDERTARGET_CLAMPRANGE_SNORM    1 +#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT   2 + +#define BRW_STENCILOP_KEEP               0 +#define BRW_STENCILOP_ZERO               1 +#define BRW_STENCILOP_REPLACE            2 +#define BRW_STENCILOP_INCRSAT            3 +#define BRW_STENCILOP_DECRSAT            4 +#define BRW_STENCILOP_INCR               5 +#define BRW_STENCILOP_DECR               6 +#define BRW_STENCILOP_INVERT             7 + +#define BRW_SURFACE_MIPMAPLAYOUT_BELOW   0 +#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT   1 + +#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT             0x000 +#define BRW_SURFACEFORMAT_R32G32B32A32_SINT              0x001 +#define BRW_SURFACEFORMAT_R32G32B32A32_UINT              0x002 +#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM             0x003 +#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM             0x004 +#define BRW_SURFACEFORMAT_R64G64_FLOAT                   0x005 +#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT             0x006 +#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED           0x007 +#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED           0x008 +#define BRW_SURFACEFORMAT_R32G32B32_FLOAT                0x040 +#define BRW_SURFACEFORMAT_R32G32B32_SINT                 0x041 +#define BRW_SURFACEFORMAT_R32G32B32_UINT                 0x042 +#define BRW_SURFACEFORMAT_R32G32B32_UNORM                0x043 +#define BRW_SURFACEFORMAT_R32G32B32_SNORM                0x044 +#define BRW_SURFACEFORMAT_R32G32B32_SSCALED              0x045 +#define BRW_SURFACEFORMAT_R32G32B32_USCALED              0x046 +#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM             0x080 +#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM             0x081 +#define BRW_SURFACEFORMAT_R16G16B16A16_SINT              0x082 +#define BRW_SURFACEFORMAT_R16G16B16A16_UINT              0x083 +#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT             0x084 +#define BRW_SURFACEFORMAT_R32G32_FLOAT                   0x085 +#define BRW_SURFACEFORMAT_R32G32_SINT                    0x086 +#define BRW_SURFACEFORMAT_R32G32_UINT                    0x087 +#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS       0x088 +#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT        0x089 +#define BRW_SURFACEFORMAT_L32A32_FLOAT                   0x08A +#define BRW_SURFACEFORMAT_R32G32_UNORM                   0x08B +#define BRW_SURFACEFORMAT_R32G32_SNORM                   0x08C +#define BRW_SURFACEFORMAT_R64_FLOAT                      0x08D +#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM             0x08E +#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT             0x08F +#define BRW_SURFACEFORMAT_A32X32_FLOAT                   0x090 +#define BRW_SURFACEFORMAT_L32X32_FLOAT                   0x091 +#define BRW_SURFACEFORMAT_I32X32_FLOAT                   0x092 +#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED           0x093 +#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED           0x094 +#define BRW_SURFACEFORMAT_R32G32_SSCALED                 0x095 +#define BRW_SURFACEFORMAT_R32G32_USCALED                 0x096 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM                 0x0C0 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB            0x0C1 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM              0x0C2 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB         0x0C3 +#define BRW_SURFACEFORMAT_R10G10B10A2_UINT               0x0C4 +#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM       0x0C5 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM                 0x0C7 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB            0x0C8 +#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM                 0x0C9 +#define BRW_SURFACEFORMAT_R8G8B8A8_SINT                  0x0CA +#define BRW_SURFACEFORMAT_R8G8B8A8_UINT                  0x0CB +#define BRW_SURFACEFORMAT_R16G16_UNORM                   0x0CC +#define BRW_SURFACEFORMAT_R16G16_SNORM                   0x0CD +#define BRW_SURFACEFORMAT_R16G16_SINT                    0x0CE +#define BRW_SURFACEFORMAT_R16G16_UINT                    0x0CF +#define BRW_SURFACEFORMAT_R16G16_FLOAT                   0x0D0 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM              0x0D1 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB         0x0D2 +#define BRW_SURFACEFORMAT_R11G11B10_FLOAT                0x0D3 +#define BRW_SURFACEFORMAT_R32_SINT                       0x0D6 +#define BRW_SURFACEFORMAT_R32_UINT                       0x0D7 +#define BRW_SURFACEFORMAT_R32_FLOAT                      0x0D8 +#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS          0x0D9 +#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT           0x0DA +#define BRW_SURFACEFORMAT_L16A16_UNORM                   0x0DF +#define BRW_SURFACEFORMAT_I24X8_UNORM                    0x0E0 +#define BRW_SURFACEFORMAT_L24X8_UNORM                    0x0E1 +#define BRW_SURFACEFORMAT_A24X8_UNORM                    0x0E2 +#define BRW_SURFACEFORMAT_I32_FLOAT                      0x0E3 +#define BRW_SURFACEFORMAT_L32_FLOAT                      0x0E4 +#define BRW_SURFACEFORMAT_A32_FLOAT                      0x0E5 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM                 0x0E9 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB            0x0EA +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM                 0x0EB +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB            0x0EC +#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP             0x0ED +#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM              0x0EE +#define BRW_SURFACEFORMAT_L16A16_FLOAT                   0x0F0 +#define BRW_SURFACEFORMAT_R32_UNORM                      0x0F1 +#define BRW_SURFACEFORMAT_R32_SNORM                      0x0F2 +#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED            0x0F3 +#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED               0x0F4 +#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED               0x0F5 +#define BRW_SURFACEFORMAT_R16G16_SSCALED                 0x0F6 +#define BRW_SURFACEFORMAT_R16G16_USCALED                 0x0F7 +#define BRW_SURFACEFORMAT_R32_SSCALED                    0x0F8 +#define BRW_SURFACEFORMAT_R32_USCALED                    0x0F9 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM                   0x100 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB              0x101 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM                 0x102 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB            0x103 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM                 0x104 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB            0x105 +#define BRW_SURFACEFORMAT_R8G8_UNORM                     0x106 +#define BRW_SURFACEFORMAT_R8G8_SNORM                     0x107 +#define BRW_SURFACEFORMAT_R8G8_SINT                      0x108 +#define BRW_SURFACEFORMAT_R8G8_UINT                      0x109 +#define BRW_SURFACEFORMAT_R16_UNORM                      0x10A +#define BRW_SURFACEFORMAT_R16_SNORM                      0x10B +#define BRW_SURFACEFORMAT_R16_SINT                       0x10C +#define BRW_SURFACEFORMAT_R16_UINT                       0x10D +#define BRW_SURFACEFORMAT_R16_FLOAT                      0x10E +#define BRW_SURFACEFORMAT_I16_UNORM                      0x111 +#define BRW_SURFACEFORMAT_L16_UNORM                      0x112 +#define BRW_SURFACEFORMAT_A16_UNORM                      0x113 +#define BRW_SURFACEFORMAT_L8A8_UNORM                     0x114 +#define BRW_SURFACEFORMAT_I16_FLOAT                      0x115 +#define BRW_SURFACEFORMAT_L16_FLOAT                      0x116 +#define BRW_SURFACEFORMAT_A16_FLOAT                      0x117 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM            0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM                 0x11A +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB            0x11B +#define BRW_SURFACEFORMAT_R8G8_SSCALED                   0x11C +#define BRW_SURFACEFORMAT_R8G8_USCALED                   0x11D +#define BRW_SURFACEFORMAT_R16_SSCALED                    0x11E +#define BRW_SURFACEFORMAT_R16_USCALED                    0x11F +#define BRW_SURFACEFORMAT_R8_UNORM                       0x140 +#define BRW_SURFACEFORMAT_R8_SNORM                       0x141 +#define BRW_SURFACEFORMAT_R8_SINT                        0x142 +#define BRW_SURFACEFORMAT_R8_UINT                        0x143 +#define BRW_SURFACEFORMAT_A8_UNORM                       0x144 +#define BRW_SURFACEFORMAT_I8_UNORM                       0x145 +#define BRW_SURFACEFORMAT_L8_UNORM                       0x146 +#define BRW_SURFACEFORMAT_P4A4_UNORM                     0x147 +#define BRW_SURFACEFORMAT_A4P4_UNORM                     0x148 +#define BRW_SURFACEFORMAT_R8_SSCALED                     0x149 +#define BRW_SURFACEFORMAT_R8_USCALED                     0x14A +#define BRW_SURFACEFORMAT_R1_UINT                        0x181 +#define BRW_SURFACEFORMAT_YCRCB_NORMAL                   0x182 +#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY                  0x183 +#define BRW_SURFACEFORMAT_BC1_UNORM                      0x186 +#define BRW_SURFACEFORMAT_BC2_UNORM                      0x187 +#define BRW_SURFACEFORMAT_BC3_UNORM                      0x188 +#define BRW_SURFACEFORMAT_BC4_UNORM                      0x189 +#define BRW_SURFACEFORMAT_BC5_UNORM                      0x18A +#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB                 0x18B +#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB                 0x18C +#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB                 0x18D +#define BRW_SURFACEFORMAT_MONO8                          0x18E +#define BRW_SURFACEFORMAT_YCRCB_SWAPUV                   0x18F +#define BRW_SURFACEFORMAT_YCRCB_SWAPY                    0x190 +#define BRW_SURFACEFORMAT_DXT1_RGB                       0x191 +#define BRW_SURFACEFORMAT_FXT1                           0x192 +#define BRW_SURFACEFORMAT_R8G8B8_UNORM                   0x193 +#define BRW_SURFACEFORMAT_R8G8B8_SNORM                   0x194 +#define BRW_SURFACEFORMAT_R8G8B8_SSCALED                 0x195 +#define BRW_SURFACEFORMAT_R8G8B8_USCALED                 0x196 +#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT             0x197 +#define BRW_SURFACEFORMAT_R64G64B64_FLOAT                0x198 +#define BRW_SURFACEFORMAT_BC4_SNORM                      0x199 +#define BRW_SURFACEFORMAT_BC5_SNORM                      0x19A +#define BRW_SURFACEFORMAT_R16G16B16_UNORM                0x19C +#define BRW_SURFACEFORMAT_R16G16B16_SNORM                0x19D +#define BRW_SURFACEFORMAT_R16G16B16_SSCALED              0x19E +#define BRW_SURFACEFORMAT_R16G16B16_USCALED              0x19F + +#define BRW_SURFACERETURNFORMAT_FLOAT32  0 +#define BRW_SURFACERETURNFORMAT_S1       1 + +#define BRW_SURFACE_1D      0 +#define BRW_SURFACE_2D      1 +#define BRW_SURFACE_3D      2 +#define BRW_SURFACE_CUBE    3 +#define BRW_SURFACE_BUFFER  4 +#define BRW_SURFACE_NULL    7 + +#define BRW_TEXCOORDMODE_WRAP            0 +#define BRW_TEXCOORDMODE_MIRROR          1 +#define BRW_TEXCOORDMODE_CLAMP           2 +#define BRW_TEXCOORDMODE_CUBE            3 +#define BRW_TEXCOORDMODE_CLAMP_BORDER    4 +#define BRW_TEXCOORDMODE_MIRROR_ONCE     5 + +#define BRW_THREAD_PRIORITY_NORMAL   0 +#define BRW_THREAD_PRIORITY_HIGH     1 + +#define BRW_TILEWALK_XMAJOR                 0 +#define BRW_TILEWALK_YMAJOR                 1 + +#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS  0 +#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS  1 + +#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA     0 +#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA   1 + +#define BRW_VFCOMPONENT_NOSTORE      0 +#define BRW_VFCOMPONENT_STORE_SRC    1 +#define BRW_VFCOMPONENT_STORE_0      2 +#define BRW_VFCOMPONENT_STORE_1_FLT  3 +#define BRW_VFCOMPONENT_STORE_1_INT  4 +#define BRW_VFCOMPONENT_STORE_VID    5 +#define BRW_VFCOMPONENT_STORE_IID    6 +#define BRW_VFCOMPONENT_STORE_PID    7 + + + +/* Execution Unit (EU) defines + */ + +#define BRW_ALIGN_1   0 +#define BRW_ALIGN_16  1 + +#define BRW_ADDRESS_DIRECT                        0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER    1 + +#define BRW_CHANNEL_X     0 +#define BRW_CHANNEL_Y     1 +#define BRW_CHANNEL_Z     2 +#define BRW_CHANNEL_W     3 + +#define BRW_COMPRESSION_NONE          0 +#define BRW_COMPRESSION_2NDHALF       1 +#define BRW_COMPRESSION_COMPRESSED    2 + +#define BRW_CONDITIONAL_NONE  0 +#define BRW_CONDITIONAL_Z     1 +#define BRW_CONDITIONAL_NZ    2 +#define BRW_CONDITIONAL_EQ    1	/* Z */ +#define BRW_CONDITIONAL_NEQ   2	/* NZ */ +#define BRW_CONDITIONAL_G     3 +#define BRW_CONDITIONAL_GE    4 +#define BRW_CONDITIONAL_L     5 +#define BRW_CONDITIONAL_LE    6 +#define BRW_CONDITIONAL_C     7 +#define BRW_CONDITIONAL_O     8 + +#define BRW_DEBUG_NONE        0 +#define BRW_DEBUG_BREAKPOINT  1 + +#define BRW_DEPENDENCY_NORMAL         0 +#define BRW_DEPENDENCY_NOTCLEARED     1 +#define BRW_DEPENDENCY_NOTCHECKED     2 +#define BRW_DEPENDENCY_DISABLE        3 + +#define BRW_EXECUTE_1     0 +#define BRW_EXECUTE_2     1 +#define BRW_EXECUTE_4     2 +#define BRW_EXECUTE_8     3 +#define BRW_EXECUTE_16    4 +#define BRW_EXECUTE_32    5 + +#define BRW_HORIZONTAL_STRIDE_0   0 +#define BRW_HORIZONTAL_STRIDE_1   1 +#define BRW_HORIZONTAL_STRIDE_2   2 +#define BRW_HORIZONTAL_STRIDE_4   3 + +#define BRW_INSTRUCTION_NORMAL    0 +#define BRW_INSTRUCTION_SATURATE  1 + +#define BRW_MASK_ENABLE   0 +#define BRW_MASK_DISABLE  1 + +#define BRW_OPCODE_MOV        1 +#define BRW_OPCODE_SEL        2 +#define BRW_OPCODE_NOT        4 +#define BRW_OPCODE_AND        5 +#define BRW_OPCODE_OR         6 +#define BRW_OPCODE_XOR        7 +#define BRW_OPCODE_SHR        8 +#define BRW_OPCODE_SHL        9 +#define BRW_OPCODE_RSR        10 +#define BRW_OPCODE_RSL        11 +#define BRW_OPCODE_ASR        12 +#define BRW_OPCODE_CMP        16 +#define BRW_OPCODE_JMPI       32 +#define BRW_OPCODE_IF         34 +#define BRW_OPCODE_IFF        35 +#define BRW_OPCODE_ELSE       36 +#define BRW_OPCODE_ENDIF      37 +#define BRW_OPCODE_DO         38 +#define BRW_OPCODE_WHILE      39 +#define BRW_OPCODE_BREAK      40 +#define BRW_OPCODE_CONTINUE   41 +#define BRW_OPCODE_HALT       42 +#define BRW_OPCODE_MSAVE      44 +#define BRW_OPCODE_MRESTORE   45 +#define BRW_OPCODE_PUSH       46 +#define BRW_OPCODE_POP        47 +#define BRW_OPCODE_WAIT       48 +#define BRW_OPCODE_SEND       49 +#define BRW_OPCODE_ADD        64 +#define BRW_OPCODE_MUL        65 +#define BRW_OPCODE_AVG        66 +#define BRW_OPCODE_FRC        67 +#define BRW_OPCODE_RNDU       68 +#define BRW_OPCODE_RNDD       69 +#define BRW_OPCODE_RNDE       70 +#define BRW_OPCODE_RNDZ       71 +#define BRW_OPCODE_MAC        72 +#define BRW_OPCODE_MACH       73 +#define BRW_OPCODE_LZD        74 +#define BRW_OPCODE_SAD2       80 +#define BRW_OPCODE_SADA2      81 +#define BRW_OPCODE_DP4        84 +#define BRW_OPCODE_DPH        85 +#define BRW_OPCODE_DP3        86 +#define BRW_OPCODE_DP2        87 +#define BRW_OPCODE_DPA2       88 +#define BRW_OPCODE_LINE       89 +#define BRW_OPCODE_NOP        126 + +#define BRW_PREDICATE_NONE             0 +#define BRW_PREDICATE_NORMAL           1 +#define BRW_PREDICATE_ALIGN1_ANYV             2 +#define BRW_PREDICATE_ALIGN1_ALLV             3 +#define BRW_PREDICATE_ALIGN1_ANY2H            4 +#define BRW_PREDICATE_ALIGN1_ALL2H            5 +#define BRW_PREDICATE_ALIGN1_ANY4H            6 +#define BRW_PREDICATE_ALIGN1_ALL4H            7 +#define BRW_PREDICATE_ALIGN1_ANY8H            8 +#define BRW_PREDICATE_ALIGN1_ALL8H            9 +#define BRW_PREDICATE_ALIGN1_ANY16H           10 +#define BRW_PREDICATE_ALIGN1_ALL16H           11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X     2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y     3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z     4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W     5 +#define BRW_PREDICATE_ALIGN16_ANY4H           6 +#define BRW_PREDICATE_ALIGN16_ALL4H           7 + +#define BRW_ARCHITECTURE_REGISTER_FILE    0 +#define BRW_GENERAL_REGISTER_FILE         1 +#define BRW_MESSAGE_REGISTER_FILE         2 +#define BRW_IMMEDIATE_VALUE               3 + +#define BRW_REGISTER_TYPE_UD  0 +#define BRW_REGISTER_TYPE_D   1 +#define BRW_REGISTER_TYPE_UW  2 +#define BRW_REGISTER_TYPE_W   3 +#define BRW_REGISTER_TYPE_UB  4 +#define BRW_REGISTER_TYPE_B   5 +#define BRW_REGISTER_TYPE_VF  5	/* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF  6 +#define BRW_REGISTER_TYPE_V   6	/* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F   7 + +#define BRW_ARF_NULL                  0x00 +#define BRW_ARF_ADDRESS               0x10 +#define BRW_ARF_ACCUMULATOR           0x20 +#define BRW_ARF_FLAG                  0x30 +#define BRW_ARF_MASK                  0x40 +#define BRW_ARF_MASK_STACK            0x50 +#define BRW_ARF_MASK_STACK_DEPTH      0x60 +#define BRW_ARF_STATE                 0x70 +#define BRW_ARF_CONTROL               0x80 +#define BRW_ARF_NOTIFICATION_COUNT    0x90 +#define BRW_ARF_IP                    0xA0 + +#define BRW_AMASK   0 +#define BRW_IMASK   1 +#define BRW_LMASK   2 +#define BRW_CMASK   3 + + + +#define BRW_THREAD_NORMAL     0 +#define BRW_THREAD_ATOMIC     1 +#define BRW_THREAD_SWITCH     2 + +#define BRW_VERTICAL_STRIDE_0                 0 +#define BRW_VERTICAL_STRIDE_1                 1 +#define BRW_VERTICAL_STRIDE_2                 2 +#define BRW_VERTICAL_STRIDE_4                 3 +#define BRW_VERTICAL_STRIDE_8                 4 +#define BRW_VERTICAL_STRIDE_16                5 +#define BRW_VERTICAL_STRIDE_32                6 +#define BRW_VERTICAL_STRIDE_64                7 +#define BRW_VERTICAL_STRIDE_128               8 +#define BRW_VERTICAL_STRIDE_256               9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL   0xF + +#define BRW_WIDTH_1       0 +#define BRW_WIDTH_2       1 +#define BRW_WIDTH_4       2 +#define BRW_WIDTH_8       3 +#define BRW_WIDTH_16      4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K      0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K      1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K      2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K      3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K     4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K     5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K     6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K    7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K    8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K    9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M      10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M      11 + +#define BRW_POLYGON_FACING_FRONT      0 +#define BRW_POLYGON_FACING_BACK       1 + +#define BRW_MESSAGE_TARGET_NULL               0 +#define BRW_MESSAGE_TARGET_MATH               1 +#define BRW_MESSAGE_TARGET_SAMPLER            2 +#define BRW_MESSAGE_TARGET_GATEWAY            3 +#define BRW_MESSAGE_TARGET_DATAPORT_READ      4 +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE     5 +#define BRW_MESSAGE_TARGET_URB                6 +#define BRW_MESSAGE_TARGET_THREAD_SPAWNER     7 + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32     0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32      2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32      3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE              0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE             0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS        0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX             1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD        1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD         1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS  2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS    2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE    0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE     2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2 +#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO             2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO            2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD                3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD                  3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD                 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW   0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH  1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS     2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS     3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS     4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD     0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS    2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS   2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS  3 + +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ          0 +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     1 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ          2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      3 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE      0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE    1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE   2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE                0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED     1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01         2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23         3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01       4 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE                0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE           1 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE                2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE            3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE              4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE     5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE               7 + +#define BRW_MATH_FUNCTION_INV                              1 +#define BRW_MATH_FUNCTION_LOG                              2 +#define BRW_MATH_FUNCTION_EXP                              3 +#define BRW_MATH_FUNCTION_SQRT                             4 +#define BRW_MATH_FUNCTION_RSQ                              5 +#define BRW_MATH_FUNCTION_SIN                              6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS                              7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS                           8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN                              9 +#define BRW_MATH_FUNCTION_POW                              10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER   11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT                 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER                13 + +#define BRW_MATH_INTEGER_UNSIGNED     0 +#define BRW_MATH_INTEGER_SIGNED       1 + +#define BRW_MATH_PRECISION_FULL        0 +#define BRW_MATH_PRECISION_PARTIAL     1 + +#define BRW_MATH_SATURATE_NONE         0 +#define BRW_MATH_SATURATE_SATURATE     1 + +#define BRW_MATH_DATA_VECTOR  0 +#define BRW_MATH_DATA_SCALAR  1 + +#define BRW_URB_OPCODE_WRITE  0 + +#define BRW_URB_SWIZZLE_NONE          0 +#define BRW_URB_SWIZZLE_INTERLEAVE    1 +#define BRW_URB_SWIZZLE_TRANSPOSE     2 + +#define BRW_SCRATCH_SPACE_SIZE_1K     0 +#define BRW_SCRATCH_SPACE_SIZE_2K     1 +#define BRW_SCRATCH_SPACE_SIZE_4K     2 +#define BRW_SCRATCH_SPACE_SIZE_8K     3 +#define BRW_SCRATCH_SPACE_SIZE_16K    4 +#define BRW_SCRATCH_SPACE_SIZE_32K    5 +#define BRW_SCRATCH_SPACE_SIZE_64K    6 +#define BRW_SCRATCH_SPACE_SIZE_128K   7 +#define BRW_SCRATCH_SPACE_SIZE_256K   8 +#define BRW_SCRATCH_SPACE_SIZE_512K   9 +#define BRW_SCRATCH_SPACE_SIZE_1M     10 +#define BRW_SCRATCH_SPACE_SIZE_2M     11 + + + + +#define CMD_URB_FENCE                 0x6000 +#define CMD_CONST_BUFFER_STATE        0x6001 +#define CMD_CONST_BUFFER              0x6002 + +#define CMD_STATE_BASE_ADDRESS        0x6101 +#define CMD_STATE_INSN_POINTER        0x6102 +#define CMD_PIPELINE_SELECT           0x6104 + +#define CMD_PIPELINED_STATE_POINTERS  0x7800 +#define CMD_BINDING_TABLE_PTRS        0x7801 +#define CMD_VERTEX_BUFFER             0x7808 +#define CMD_VERTEX_ELEMENT            0x7809 +#define CMD_INDEX_BUFFER              0x780a +#define CMD_VF_STATISTICS             0x780b + +#define CMD_DRAW_RECT                 0x7900 +#define CMD_BLEND_CONSTANT_COLOR      0x7901 +#define CMD_CHROMA_KEY                0x7904 +#define CMD_DEPTH_BUFFER              0x7905 +#define CMD_POLY_STIPPLE_OFFSET       0x7906 +#define CMD_POLY_STIPPLE_PATTERN      0x7907 +#define CMD_LINE_STIPPLE_PATTERN      0x7908 +#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 + +#define CMD_PIPE_CONTROL              0x7a00 + +#define CMD_3D_PRIM                   0x7b00 + +#define CMD_MI_FLUSH                  0x0200 + + +/* Various values from the R0 vertex header: + */ +#define R02_PRIM_END    0x1 +#define R02_PRIM_START  0x2 + + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_draw.c b/src/gallium/drivers/i965simple/brw_draw.c new file mode 100644 index 0000000000..648aaa0da5 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_draw.c @@ -0,0 +1,239 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdlib.h> + +#include "brw_batch.h" +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_state.h" + +#include "pipe/p_context.h" +#include "pipe/internal/p_winsys_screen.h" + +static unsigned hw_prim[PIPE_PRIM_POLYGON+1] = { +   _3DPRIM_POINTLIST, +   _3DPRIM_LINELIST, +   _3DPRIM_LINELOOP, +   _3DPRIM_LINESTRIP, +   _3DPRIM_TRILIST, +   _3DPRIM_TRISTRIP, +   _3DPRIM_TRIFAN, +   _3DPRIM_QUADLIST, +   _3DPRIM_QUADSTRIP, +   _3DPRIM_POLYGON +}; + + +static const int reduced_prim[PIPE_PRIM_POLYGON+1] = { +   PIPE_PRIM_POINTS, +   PIPE_PRIM_LINES, +   PIPE_PRIM_LINES, +   PIPE_PRIM_LINES, +   PIPE_PRIM_TRIANGLES, +   PIPE_PRIM_TRIANGLES, +   PIPE_PRIM_TRIANGLES, +   PIPE_PRIM_TRIANGLES, +   PIPE_PRIM_TRIANGLES, +   PIPE_PRIM_TRIANGLES +}; + + +/* When the primitive changes, set a state bit and re-validate.  Not + * the nicest and would rather deal with this by having all the + * programs be immune to the active primitive (ie. cope with all + * possibilities).  That may not be realistic however. + */ +static void brw_set_prim(struct brw_context *brw, int prim) +{ +   PRINT("PRIM: %d\n", prim); + +   /* Slight optimization to avoid the GS program when not needed: +    */ +   if (prim == PIPE_PRIM_QUAD_STRIP && +       brw->attribs.Raster->flatshade && +       brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_FILL && +       brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_FILL) +      prim = PIPE_PRIM_TRIANGLE_STRIP; + +   if (prim != brw->primitive) { +      brw->primitive = prim; +      brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; + +      if (reduced_prim[prim] != brw->reduced_primitive) { +	 brw->reduced_primitive = reduced_prim[prim]; +	 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; +      } + +      brw_validate_state(brw); +   } + +} + + +static unsigned trim(int prim, unsigned length) +{ +   if (prim == PIPE_PRIM_QUAD_STRIP) +      return length > 3 ? (length - length % 2) : 0; +   else if (prim == PIPE_PRIM_QUADS) +      return length - length % 4; +   else +      return length; +} + + + +static boolean brw_emit_prim( struct brw_context *brw, +			      boolean indexed, +			      unsigned start, +			      unsigned count ) + +{ +   struct brw_3d_primitive prim_packet; + +   if (BRW_DEBUG & DEBUG_PRIMS) +      PRINT("PRIM: %d %d %d\n",  brw->primitive, start, count); + +   prim_packet.header.opcode = CMD_3D_PRIM; +   prim_packet.header.length = sizeof(prim_packet)/4 - 2; +   prim_packet.header.pad = 0; +   prim_packet.header.topology = hw_prim[brw->primitive]; +   prim_packet.header.indexed = indexed; + +   prim_packet.verts_per_instance = trim(brw->primitive, count); +   prim_packet.start_vert_location = start; +   prim_packet.instance_count = 1; +   prim_packet.start_instance_location = 0; +   prim_packet.base_vert_location = 0; + +   if (prim_packet.verts_per_instance == 0) +      return TRUE; + +   return brw_batchbuffer_data( brw->winsys, +                                &prim_packet, +                                sizeof(prim_packet) ); +} + + +/* May fail if out of video memory for texture or vbo upload, or on + * fallback conditions. + */ +static boolean brw_try_draw_elements( struct pipe_context *pipe, +				      struct pipe_buffer *index_buffer, +				      unsigned index_size, +				      unsigned mode, +				      unsigned start, +				      unsigned count ) +{ +   struct brw_context *brw = brw_context(pipe); + +   /* Set the first primitive ahead of validate_state: +    */ +   brw_set_prim(brw, mode); + +   /* Upload index, vertex data: +    */ +   if (index_buffer && +       !brw_upload_indices( brw, index_buffer, index_size, start, count )) +      return FALSE; + +   if (!brw_upload_vertex_buffers(brw)) +      return FALSE; + +   if (!brw_upload_vertex_elements( brw )) +      return FALSE; + +   /* XXX:  Need to separate validate and upload of state. +    */ +   if (brw->state.dirty.brw) +      brw_validate_state( brw ); + +   if (!brw_emit_prim(brw, index_buffer != NULL, +                      start, count)) +      return FALSE; + +   return TRUE; +} + + + +static boolean brw_draw_elements( struct pipe_context *pipe, +				  struct pipe_buffer *indexBuffer, +				  unsigned indexSize, +				  unsigned mode, +				  unsigned start, +				  unsigned count ) +{ +   if (!brw_try_draw_elements( pipe, +			       indexBuffer, +			       indexSize, +			       mode, start, count )) +   { +      /* flush ? */ + +      if (!brw_try_draw_elements( pipe, +				  indexBuffer, +				  indexSize, +				  mode, start, +				  count )) { +	 assert(0); +	 return FALSE; +      } +   } + +   return TRUE; +} + + + +static boolean brw_draw_arrays( struct pipe_context *pipe, +				    unsigned mode, +				    unsigned start, +				    unsigned count ) +{ +   if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) { +      /* flush ? */ + +      if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) { +	 assert(0); +	 return FALSE; +      } +   } +    +   return TRUE; +} + + + +void brw_init_draw_functions( struct brw_context *brw ) +{ +   brw->pipe.draw_arrays = brw_draw_arrays; +   brw->pipe.draw_elements = brw_draw_elements; +} + + diff --git a/src/gallium/drivers/i965simple/brw_draw.h b/src/gallium/drivers/i965simple/brw_draw.h new file mode 100644 index 0000000000..62fe0d5d0e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_draw.h @@ -0,0 +1,55 @@ + /************************************************************************** + *  + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef BRW_DRAW_H +#define BRW_DRAW_H + +#include "pipe/p_context.h" + +struct brw_context; + + + +void brw_init_draw_functions( struct brw_context *brw ); + + +boolean brw_upload_vertices( struct brw_context *brw, +			       unsigned min_index, +			       unsigned max_index ); + +boolean brw_upload_indices(struct brw_context *brw, +                           const struct pipe_buffer *index_buffer, +                           int ib_size, int start, int count); + +boolean brw_upload_vertex_buffers( struct brw_context *brw ); +boolean brw_upload_vertex_elements( struct brw_context *brw ); + +unsigned brw_translate_surface_format( unsigned id ); + + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c new file mode 100644 index 0000000000..2d9ca3f2ea --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_draw_upload.c @@ -0,0 +1,300 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdlib.h> + +#include "brw_batch.h" +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_state.h" + + +struct brw_array_state { +   union header_union header; + +   struct { +      union { +	 struct { +	    unsigned pitch:11; +	    unsigned pad:15; +	    unsigned access_type:1; +	    unsigned vb_index:5; +	 } bits; +	 unsigned dword; +      } vb0; + +      struct pipe_buffer *buffer; +      unsigned offset; + +      unsigned max_index; +      unsigned instance_data_step_rate; + +   } vb[BRW_VBP_MAX]; +}; + + + +unsigned brw_translate_surface_format( unsigned id ) +{ +   switch (id) { +   case PIPE_FORMAT_R64_FLOAT: +      return BRW_SURFACEFORMAT_R64_FLOAT; +   case PIPE_FORMAT_R64G64_FLOAT: +      return BRW_SURFACEFORMAT_R64G64_FLOAT; +   case PIPE_FORMAT_R64G64B64_FLOAT: +      return BRW_SURFACEFORMAT_R64G64B64_FLOAT; +   case PIPE_FORMAT_R64G64B64A64_FLOAT: +      return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT; + +   case PIPE_FORMAT_R32_FLOAT: +      return BRW_SURFACEFORMAT_R32_FLOAT; +   case PIPE_FORMAT_R32G32_FLOAT: +      return BRW_SURFACEFORMAT_R32G32_FLOAT; +   case PIPE_FORMAT_R32G32B32_FLOAT: +      return BRW_SURFACEFORMAT_R32G32B32_FLOAT; +   case PIPE_FORMAT_R32G32B32A32_FLOAT: +      return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + +   case PIPE_FORMAT_R32_UNORM: +      return BRW_SURFACEFORMAT_R32_UNORM; +   case PIPE_FORMAT_R32G32_UNORM: +      return BRW_SURFACEFORMAT_R32G32_UNORM; +   case PIPE_FORMAT_R32G32B32_UNORM: +      return BRW_SURFACEFORMAT_R32G32B32_UNORM; +   case PIPE_FORMAT_R32G32B32A32_UNORM: +      return BRW_SURFACEFORMAT_R32G32B32A32_UNORM; + +   case PIPE_FORMAT_R32_USCALED: +      return BRW_SURFACEFORMAT_R32_USCALED; +   case PIPE_FORMAT_R32G32_USCALED: +      return BRW_SURFACEFORMAT_R32G32_USCALED; +   case PIPE_FORMAT_R32G32B32_USCALED: +      return BRW_SURFACEFORMAT_R32G32B32_USCALED; +   case PIPE_FORMAT_R32G32B32A32_USCALED: +      return BRW_SURFACEFORMAT_R32G32B32A32_USCALED; + +   case PIPE_FORMAT_R32_SNORM: +      return BRW_SURFACEFORMAT_R32_SNORM; +   case PIPE_FORMAT_R32G32_SNORM: +      return BRW_SURFACEFORMAT_R32G32_SNORM; +   case PIPE_FORMAT_R32G32B32_SNORM: +      return BRW_SURFACEFORMAT_R32G32B32_SNORM; +   case PIPE_FORMAT_R32G32B32A32_SNORM: +      return BRW_SURFACEFORMAT_R32G32B32A32_SNORM; + +   case PIPE_FORMAT_R32_SSCALED: +      return BRW_SURFACEFORMAT_R32_SSCALED; +   case PIPE_FORMAT_R32G32_SSCALED: +      return BRW_SURFACEFORMAT_R32G32_SSCALED; +   case PIPE_FORMAT_R32G32B32_SSCALED: +      return BRW_SURFACEFORMAT_R32G32B32_SSCALED; +   case PIPE_FORMAT_R32G32B32A32_SSCALED: +      return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED; + +   case PIPE_FORMAT_R16_UNORM: +      return BRW_SURFACEFORMAT_R16_UNORM; +   case PIPE_FORMAT_R16G16_UNORM: +      return BRW_SURFACEFORMAT_R16G16_UNORM; +   case PIPE_FORMAT_R16G16B16_UNORM: +      return BRW_SURFACEFORMAT_R16G16B16_UNORM; +   case PIPE_FORMAT_R16G16B16A16_UNORM: +      return BRW_SURFACEFORMAT_R16G16B16A16_UNORM; + +   case PIPE_FORMAT_R16_USCALED: +      return BRW_SURFACEFORMAT_R16_USCALED; +   case PIPE_FORMAT_R16G16_USCALED: +      return BRW_SURFACEFORMAT_R16G16_USCALED; +   case PIPE_FORMAT_R16G16B16_USCALED: +      return BRW_SURFACEFORMAT_R16G16B16_USCALED; +   case PIPE_FORMAT_R16G16B16A16_USCALED: +      return BRW_SURFACEFORMAT_R16G16B16A16_USCALED; + +   case PIPE_FORMAT_R16_SNORM: +      return BRW_SURFACEFORMAT_R16_SNORM; +   case PIPE_FORMAT_R16G16_SNORM: +      return BRW_SURFACEFORMAT_R16G16_SNORM; +   case PIPE_FORMAT_R16G16B16_SNORM: +      return BRW_SURFACEFORMAT_R16G16B16_SNORM; +   case PIPE_FORMAT_R16G16B16A16_SNORM: +      return BRW_SURFACEFORMAT_R16G16B16A16_SNORM; + +   case PIPE_FORMAT_R16_SSCALED: +      return BRW_SURFACEFORMAT_R16_SSCALED; +   case PIPE_FORMAT_R16G16_SSCALED: +      return BRW_SURFACEFORMAT_R16G16_SSCALED; +   case PIPE_FORMAT_R16G16B16_SSCALED: +      return BRW_SURFACEFORMAT_R16G16B16_SSCALED; +   case PIPE_FORMAT_R16G16B16A16_SSCALED: +      return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED; + +   case PIPE_FORMAT_R8_UNORM: +      return BRW_SURFACEFORMAT_R8_UNORM; +   case PIPE_FORMAT_R8G8_UNORM: +      return BRW_SURFACEFORMAT_R8G8_UNORM; +   case PIPE_FORMAT_R8G8B8_UNORM: +      return BRW_SURFACEFORMAT_R8G8B8_UNORM; +   case PIPE_FORMAT_R8G8B8A8_UNORM: +      return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + +   case PIPE_FORMAT_R8_USCALED: +      return BRW_SURFACEFORMAT_R8_USCALED; +   case PIPE_FORMAT_R8G8_USCALED: +      return BRW_SURFACEFORMAT_R8G8_USCALED; +   case PIPE_FORMAT_R8G8B8_USCALED: +      return BRW_SURFACEFORMAT_R8G8B8_USCALED; +   case PIPE_FORMAT_R8G8B8A8_USCALED: +      return BRW_SURFACEFORMAT_R8G8B8A8_USCALED; + +   case PIPE_FORMAT_R8_SNORM: +      return BRW_SURFACEFORMAT_R8_SNORM; +   case PIPE_FORMAT_R8G8_SNORM: +      return BRW_SURFACEFORMAT_R8G8_SNORM; +   case PIPE_FORMAT_R8G8B8_SNORM: +      return BRW_SURFACEFORMAT_R8G8B8_SNORM; +   case PIPE_FORMAT_R8G8B8A8_SNORM: +      return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; + +   case PIPE_FORMAT_R8_SSCALED: +      return BRW_SURFACEFORMAT_R8_SSCALED; +   case PIPE_FORMAT_R8G8_SSCALED: +      return BRW_SURFACEFORMAT_R8G8_SSCALED; +   case PIPE_FORMAT_R8G8B8_SSCALED: +      return BRW_SURFACEFORMAT_R8G8B8_SSCALED; +   case PIPE_FORMAT_R8G8B8A8_SSCALED: +      return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED; + +   default: +      assert(0); +      return 0; +   } +} + +static unsigned get_index_type(int type) +{ +   switch (type) { +   case 1: return BRW_INDEX_BYTE; +   case 2: return BRW_INDEX_WORD; +   case 4: return BRW_INDEX_DWORD; +   default: assert(0); return 0; +   } +} + + +boolean brw_upload_vertex_buffers( struct brw_context *brw ) +{ +   struct brw_array_state vbp; +   unsigned nr_enabled = 0; +   unsigned i; + +   memset(&vbp, 0, sizeof(vbp)); + +   /* This is a hardware limit: +    */ + +   for (i = 0; i < BRW_VEP_MAX; i++) +   { +      if (brw->vb.vbo_array[i] == NULL) { +	 nr_enabled = i; +	 break; +      } + +      vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->stride; +      vbp.vb[i].vb0.bits.pad = 0; +      vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA; +      vbp.vb[i].vb0.bits.vb_index = i; +      vbp.vb[i].offset = brw->vb.vbo_array[i]->buffer_offset; +      vbp.vb[i].buffer = brw->vb.vbo_array[i]->buffer; +      vbp.vb[i].max_index = brw->vb.vbo_array[i]->max_index; +   } + + +   vbp.header.bits.length = (1 + nr_enabled * 4) - 2; +   vbp.header.bits.opcode = CMD_VERTEX_BUFFER; + +   BEGIN_BATCH(vbp.header.bits.length+2, 0); +   OUT_BATCH( vbp.header.dword ); + +   for (i = 0; i < nr_enabled; i++) { +      OUT_BATCH( vbp.vb[i].vb0.dword ); +      OUT_RELOC( vbp.vb[i].buffer,  PIPE_BUFFER_USAGE_GPU_READ, +		 vbp.vb[i].offset); +      OUT_BATCH( vbp.vb[i].max_index ); +      OUT_BATCH( vbp.vb[i].instance_data_step_rate ); +   } +   ADVANCE_BATCH(); +   return TRUE; +} + + + +boolean brw_upload_vertex_elements( struct brw_context *brw ) +{ +   struct brw_vertex_element_packet vep; + +   unsigned i; +   unsigned nr_enabled = brw->attribs.VertexProgram->info.num_inputs; + +   memset(&vep, 0, sizeof(vep)); + +   for (i = 0; i < nr_enabled; i++)  +      vep.ve[i] = brw->vb.inputs[i]; + + +   vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2; +   vep.header.opcode = CMD_VERTEX_ELEMENT; +   brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0])); + +   return TRUE; +} + +boolean brw_upload_indices( struct brw_context *brw, +                            const struct pipe_buffer *index_buffer, +                            int ib_size, int start, int count) +{ +   /* Emit the indexbuffer packet: +    */ +   { +      struct brw_indexbuffer ib; + +      memset(&ib, 0, sizeof(ib)); + +      ib.header.bits.opcode = CMD_INDEX_BUFFER; +      ib.header.bits.length = sizeof(ib)/4 - 2; +      ib.header.bits.index_format = get_index_type(ib_size); +      ib.header.bits.cut_index_enable = 0; + + +      BEGIN_BATCH(4, 0); +      OUT_BATCH( ib.header.dword ); +      OUT_RELOC( index_buffer, PIPE_BUFFER_USAGE_GPU_READ, start); +      OUT_RELOC( index_buffer, PIPE_BUFFER_USAGE_GPU_READ, start + count); +      OUT_BATCH( 0 ); +      ADVANCE_BATCH(); +   } +   return TRUE; +} diff --git a/src/gallium/drivers/i965simple/brw_eu.c b/src/gallium/drivers/i965simple/brw_eu.c new file mode 100644 index 0000000000..e2002d1821 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu.c @@ -0,0 +1,130 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. +  + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: +  + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. +  + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +  + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ +   + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + + +/* How does predicate control work when execution_size != 8?  Do I + * need to test/set for 0xffff when execution_size is 16? + */ +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ) +{ +   p->current->header.predicate_control = BRW_PREDICATE_NONE; + +   if (value != 0xff) { +      if (value != p->flag_value) { +	 brw_push_insn_state(p); +	 brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); +	 p->flag_value = value; +	 brw_pop_insn_state(p); +      } + +      p->current->header.predicate_control = BRW_PREDICATE_NORMAL; +   }    +} + +void brw_set_predicate_control( struct brw_compile *p, unsigned pc ) +{ +   p->current->header.predicate_control = pc; +} + +void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ) +{ +   p->current->header.destreg__conditonalmod = conditional; +} + +void brw_set_access_mode( struct brw_compile *p, unsigned access_mode ) +{ +   p->current->header.access_mode = access_mode; +} + +void brw_set_compression_control( struct brw_compile *p, boolean compression_control ) +{ +   p->current->header.compression_control = compression_control; +} + +void brw_set_mask_control( struct brw_compile *p, unsigned value ) +{ +   p->current->header.mask_control = value; +} + +void brw_set_saturate( struct brw_compile *p, unsigned value ) +{ +   p->current->header.saturate = value; +} + +void brw_push_insn_state( struct brw_compile *p ) +{ +   assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); +   memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); +   p->current++;    +} + +void brw_pop_insn_state( struct brw_compile *p ) +{ +   assert(p->current != p->stack); +   p->current--; +} + + +/*********************************************************************** + */ +void brw_init_compile( struct brw_compile *p ) +{ +   p->nr_insn = 0; +   p->current = p->stack; +   memset(p->current, 0, sizeof(p->current[0])); + +   /* Some defaults? +    */ +   brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ +   brw_set_saturate(p, 0); +   brw_set_compression_control(p, BRW_COMPRESSION_NONE); +   brw_set_predicate_control_flag_value(p, 0xff);  +} + + +const unsigned *brw_get_program( struct brw_compile *p, +			       unsigned *sz ) +{ +   unsigned i; + +   for (i = 0; i < 8; i++) +      brw_NOP(p); + +   *sz = p->nr_insn * sizeof(struct brw_instruction); +   return (const unsigned *)p->store; +} + diff --git a/src/gallium/drivers/i965simple/brw_eu.h b/src/gallium/drivers/i965simple/brw_eu.h new file mode 100644 index 0000000000..23151ae9ed --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu.h @@ -0,0 +1,888 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#ifndef BRW_EU_H +#define BRW_EU_H + +#include "brw_structs.h" +#include "brw_defines.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_shader_tokens.h" + +#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) +#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +#define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1) + + +#define REG_SIZE (8*4) + + +/* These aren't hardware structs, just something useful for us to pass around: + * + * Align1 operation has a lot of control over input ranges.  Used in + * WM programs to implement shaders decomposed into "channel serial" + * or "structure of array" form: + */ +struct brw_reg +{ +   unsigned type:4; +   unsigned file:2; +   unsigned nr:8; +   unsigned subnr:5;		/* :1 in align16 */ +   unsigned negate:1;		/* source only */ +   unsigned abs:1;		/* source only */ +   unsigned vstride:4;		/* source only */ +   unsigned width:3;		/* src only, align1 only */ +   unsigned hstride:2;   		/* src only, align1 only */ +   unsigned address_mode:1;	/* relative addressing, hopefully! */ +   unsigned pad0:1; + +   union { +      struct { +	 unsigned swizzle:8;		/* src only, align16 only */ +	 unsigned writemask:4;		/* dest only, align16 only */ +	 int  indirect_offset:10;	/* relative addressing offset */ +	 unsigned pad1:10;		/* two dwords total */ +      } bits; + +      float f; +      int   d; +      unsigned ud; +   } dw1; +}; + + +struct brw_indirect { +   unsigned addr_subnr:4; +   int addr_offset:10; +   unsigned pad:18; +}; + + +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN 1200 + +struct brw_compile { +   struct brw_instruction store[BRW_EU_MAX_INSN]; +   unsigned nr_insn; + +   /* Allow clients to push/pop instruction state: +    */ +   struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; +   struct brw_instruction *current; + +   unsigned flag_value; +   boolean single_program_flow; +}; + + + +static __inline int type_sz( unsigned type ) +{ +   switch( type ) { +   case BRW_REGISTER_TYPE_UD: +   case BRW_REGISTER_TYPE_D: +   case BRW_REGISTER_TYPE_F: +      return 4; +   case BRW_REGISTER_TYPE_HF: +   case BRW_REGISTER_TYPE_UW: +   case BRW_REGISTER_TYPE_W: +      return 2; +   case BRW_REGISTER_TYPE_UB: +   case BRW_REGISTER_TYPE_B: +      return 1; +   default: +      return 0; +   } +} + +static __inline struct brw_reg brw_reg( unsigned file, +					unsigned nr, +					unsigned subnr, +					unsigned type, +					unsigned vstride, +					unsigned width, +					unsigned hstride, +					unsigned swizzle, +					unsigned writemask) +{ + +   struct brw_reg reg; +   reg.type = type; +   reg.file = file; +   reg.nr = nr; +   reg.subnr = subnr * type_sz(type); +   reg.negate = 0; +   reg.abs = 0; +   reg.vstride = vstride; +   reg.width = width; +   reg.hstride = hstride; +   reg.address_mode = BRW_ADDRESS_DIRECT; +   reg.pad0 = 0; + +   /* Could do better: If the reg is r5.3<0;1,0>, we probably want to +    * set swizzle and writemask to W, as the lower bits of subnr will +    * be lost when converted to align16.  This is probably too much to +    * keep track of as you'd want it adjusted by suboffset(), etc. +    * Perhaps fix up when converting to align16? +    */ +   reg.dw1.bits.swizzle = swizzle; +   reg.dw1.bits.writemask = writemask; +   reg.dw1.bits.indirect_offset = 0; +   reg.dw1.bits.pad1 = 0; +   return reg; +} + +static __inline struct brw_reg brw_vec16_reg( unsigned file, +					      unsigned nr, +					      unsigned subnr ) +{ +   return brw_reg(file, +		  nr, +		  subnr, +		  BRW_REGISTER_TYPE_F, +		  BRW_VERTICAL_STRIDE_16, +		  BRW_WIDTH_16, +		  BRW_HORIZONTAL_STRIDE_1, +		  BRW_SWIZZLE_XYZW, +		  TGSI_WRITEMASK_XYZW); +} + +static __inline struct brw_reg brw_vec8_reg( unsigned file, +					     unsigned nr, +					     unsigned subnr ) +{ +   return brw_reg(file, +		  nr, +		  subnr, +		  BRW_REGISTER_TYPE_F, +		  BRW_VERTICAL_STRIDE_8, +		  BRW_WIDTH_8, +		  BRW_HORIZONTAL_STRIDE_1, +		  BRW_SWIZZLE_XYZW, +		  TGSI_WRITEMASK_XYZW); +} + + +static __inline struct brw_reg brw_vec4_reg( unsigned file, +					      unsigned nr, +					      unsigned subnr ) +{ +   return brw_reg(file, +		  nr, +		  subnr, +		  BRW_REGISTER_TYPE_F, +		  BRW_VERTICAL_STRIDE_4, +		  BRW_WIDTH_4, +		  BRW_HORIZONTAL_STRIDE_1, +		  BRW_SWIZZLE_XYZW, +		  TGSI_WRITEMASK_XYZW); +} + + +static __inline struct brw_reg brw_vec2_reg( unsigned file, +					      unsigned nr, +					      unsigned subnr ) +{ +   return brw_reg(file, +		  nr, +		  subnr, +		  BRW_REGISTER_TYPE_F, +		  BRW_VERTICAL_STRIDE_2, +		  BRW_WIDTH_2, +		  BRW_HORIZONTAL_STRIDE_1, +		  BRW_SWIZZLE_XYXY, +		  TGSI_WRITEMASK_XY); +} + +static __inline struct brw_reg brw_vec1_reg( unsigned file, +					     unsigned nr, +					     unsigned subnr ) +{ +   return brw_reg(file, +		  nr, +		  subnr, +		  BRW_REGISTER_TYPE_F, +		  BRW_VERTICAL_STRIDE_0, +		  BRW_WIDTH_1, +		  BRW_HORIZONTAL_STRIDE_0, +		  BRW_SWIZZLE_XXXX, +		  TGSI_WRITEMASK_X); +} + + +static __inline struct brw_reg retype( struct brw_reg reg, +				       unsigned type ) +{ +   reg.type = type; +   return reg; +} + +static __inline struct brw_reg suboffset( struct brw_reg reg, +					  unsigned delta ) +{ +   reg.subnr += delta * type_sz(reg.type); +   return reg; +} + + +static __inline struct brw_reg offset( struct brw_reg reg, +				       unsigned delta ) +{ +   reg.nr += delta; +   return reg; +} + + +static __inline struct brw_reg byte_offset( struct brw_reg reg, +					    unsigned bytes ) +{ +   unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; +   reg.nr = newoffset / REG_SIZE; +   reg.subnr = newoffset % REG_SIZE; +   return reg; +} + + +static __inline struct brw_reg brw_uw16_reg( unsigned file, +					     unsigned nr, +					     unsigned subnr ) +{ +   return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_uw8_reg( unsigned file, +					    unsigned nr, +					    unsigned subnr ) +{ +   return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_uw1_reg( unsigned file, +					    unsigned nr, +					    unsigned subnr ) +{ +   return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_imm_reg( unsigned type ) +{ +   return brw_reg( BRW_IMMEDIATE_VALUE, +		   0, +		   0, +		   type, +		   BRW_VERTICAL_STRIDE_0, +		   BRW_WIDTH_1, +		   BRW_HORIZONTAL_STRIDE_0, +		   0, +		   0); +} + +static __inline struct brw_reg brw_imm_f( float f ) +{ +   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); +   imm.dw1.f = f; +   return imm; +} + +static __inline struct brw_reg brw_imm_d( int d ) +{ +   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); +   imm.dw1.d = d; +   return imm; +} + +static __inline struct brw_reg brw_imm_ud( unsigned ud ) +{ +   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); +   imm.dw1.ud = ud; +   return imm; +} + +static __inline struct brw_reg brw_imm_uw( ushort uw ) +{ +   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); +   imm.dw1.ud = uw; +   return imm; +} + +static __inline struct brw_reg brw_imm_w( short w ) +{ +   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); +   imm.dw1.d = w; +   return imm; +} + +/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type + * numbers alias with _V and _VF below: + */ + +/* Vector of eight signed half-byte values: + */ +static __inline struct brw_reg brw_imm_v( unsigned v ) +{ +   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); +   imm.vstride = BRW_VERTICAL_STRIDE_0; +   imm.width = BRW_WIDTH_8; +   imm.hstride = BRW_HORIZONTAL_STRIDE_1; +   imm.dw1.ud = v; +   return imm; +} + +/* Vector of four 8-bit float values: + */ +static __inline struct brw_reg brw_imm_vf( unsigned v ) +{ +   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); +   imm.vstride = BRW_VERTICAL_STRIDE_0; +   imm.width = BRW_WIDTH_4; +   imm.hstride = BRW_HORIZONTAL_STRIDE_1; +   imm.dw1.ud = v; +   return imm; +} + +#define VF_ZERO 0x0 +#define VF_ONE  0x30 +#define VF_NEG  (1<<7) + +static __inline struct brw_reg brw_imm_vf4( unsigned v0, +					    unsigned v1, +					    unsigned v2, +					    unsigned v3) +{ +   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); +   imm.vstride = BRW_VERTICAL_STRIDE_0; +   imm.width = BRW_WIDTH_4; +   imm.hstride = BRW_HORIZONTAL_STRIDE_1; +   imm.dw1.ud = ((v0 << 0) | +		 (v1 << 8) | +		 (v2 << 16) | +		 (v3 << 24)); +   return imm; +} + + +static __inline struct brw_reg brw_address( struct brw_reg reg ) +{ +   return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); +} + + +static __inline struct brw_reg brw_vec1_grf( unsigned nr, +					       unsigned subnr ) +{ +   return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_vec8_grf( unsigned nr, +					     unsigned subnr ) +{ +   return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_vec4_grf( unsigned nr, +					     unsigned subnr ) +{ +   return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + + +static __inline struct brw_reg brw_vec2_grf( unsigned nr, +					     unsigned subnr ) +{ +   return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_uw8_grf( unsigned nr, +					    unsigned subnr ) +{ +   return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_null_reg( void ) +{ +   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, +		       BRW_ARF_NULL, +		       0); +} + +static __inline struct brw_reg brw_address_reg( unsigned subnr ) +{ +   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, +		      BRW_ARF_ADDRESS, +		      subnr); +} + +/* If/else instructions break in align16 mode if writemask & swizzle + * aren't xyzw.  This goes against the convention for other scalar + * regs: + */ +static __inline struct brw_reg brw_ip_reg( void ) +{ +   return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, +		  BRW_ARF_IP, +		  0, +		  BRW_REGISTER_TYPE_UD, +		  BRW_VERTICAL_STRIDE_4, /* ? */ +		  BRW_WIDTH_1, +		  BRW_HORIZONTAL_STRIDE_0, +		  BRW_SWIZZLE_XYZW, /* NOTE! */ +		  TGSI_WRITEMASK_XYZW); /* NOTE! */ +} + +static __inline struct brw_reg brw_acc_reg( void ) +{ +   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, +		       BRW_ARF_ACCUMULATOR, +		       0); +} + + +static __inline struct brw_reg brw_flag_reg( void ) +{ +   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, +		      BRW_ARF_FLAG, +		      0); +} + + +static __inline struct brw_reg brw_mask_reg( unsigned subnr ) +{ +   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, +		      BRW_ARF_MASK, +		      subnr); +} + +static __inline struct brw_reg brw_message_reg( unsigned nr ) +{ +   return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, +		       nr, +		       0); +} + + + + +/* This is almost always called with a numeric constant argument, so + * make things easy to evaluate at compile time: + */ +static __inline unsigned cvt( unsigned val ) +{ +   switch (val) { +   case 0: return 0; +   case 1: return 1; +   case 2: return 2; +   case 4: return 3; +   case 8: return 4; +   case 16: return 5; +   case 32: return 6; +   } +   return 0; +} + +static __inline struct brw_reg stride( struct brw_reg reg, +				       unsigned vstride, +				       unsigned width, +				       unsigned hstride ) +{ + +   reg.vstride = cvt(vstride); +   reg.width = cvt(width) - 1; +   reg.hstride = cvt(hstride); +   return reg; +} + +static __inline struct brw_reg vec16( struct brw_reg reg ) +{ +   return stride(reg, 16,16,1); +} + +static __inline struct brw_reg vec8( struct brw_reg reg ) +{ +   return stride(reg, 8,8,1); +} + +static __inline struct brw_reg vec4( struct brw_reg reg ) +{ +   return stride(reg, 4,4,1); +} + +static __inline struct brw_reg vec2( struct brw_reg reg ) +{ +   return stride(reg, 2,2,1); +} + +static __inline struct brw_reg vec1( struct brw_reg reg ) +{ +   return stride(reg, 0,1,0); +} + +static __inline struct brw_reg get_element( struct brw_reg reg, unsigned elt ) +{ +   return vec1(suboffset(reg, elt)); +} + +static __inline struct brw_reg get_element_ud( struct brw_reg reg, unsigned elt ) +{ +   return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); +} + + +static __inline struct brw_reg brw_swizzle( struct brw_reg reg, +					    unsigned x, +					    unsigned y, +					    unsigned z, +					    unsigned w) +{ +   reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), +				       BRW_GET_SWZ(reg.dw1.bits.swizzle, y), +				       BRW_GET_SWZ(reg.dw1.bits.swizzle, z), +				       BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); +   return reg; +} + + +static __inline struct brw_reg brw_swizzle1( struct brw_reg reg, +					     unsigned x ) +{ +   return brw_swizzle(reg, x, x, x, x); +} + +static __inline struct brw_reg brw_writemask( struct brw_reg reg, +					      unsigned mask ) +{ +   reg.dw1.bits.writemask &= mask; +   return reg; +} + +static __inline struct brw_reg brw_set_writemask( struct brw_reg reg, +						  unsigned mask ) +{ +   reg.dw1.bits.writemask = mask; +   return reg; +} + +static __inline struct brw_reg negate( struct brw_reg reg ) +{ +   reg.negate ^= 1; +   return reg; +} + +static __inline struct brw_reg brw_abs( struct brw_reg reg ) +{ +   reg.abs = 1; +   return reg; +} + +/*********************************************************************** + */ +static __inline struct brw_reg brw_vec4_indirect( unsigned subnr, +						  int offset ) +{ +   struct brw_reg reg =  brw_vec4_grf(0, 0); +   reg.subnr = subnr; +   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; +   reg.dw1.bits.indirect_offset = offset; +   return reg; +} + +static __inline struct brw_reg brw_vec1_indirect( unsigned subnr, +						  int offset ) +{ +   struct brw_reg reg =  brw_vec1_grf(0, 0); +   reg.subnr = subnr; +   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; +   reg.dw1.bits.indirect_offset = offset; +   return reg; +} + +static __inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset) +{ +   return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static __inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset) +{ +   return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static __inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset) +{ +   return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); +} + +static __inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset) +{ +   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); +} + +static __inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset) +{ +   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static __inline struct brw_reg get_addr_reg(struct brw_indirect ptr) +{ +   return brw_address_reg(ptr.addr_subnr); +} + +static __inline struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, int offset ) +{ +   ptr.addr_offset += offset; +   return ptr; +} + +static __inline struct brw_indirect brw_indirect( unsigned addr_subnr, int offset ) +{ +   struct brw_indirect ptr; +   ptr.addr_subnr = addr_subnr; +   ptr.addr_offset = offset; +   ptr.pad = 0; +   return ptr; +} + +static __inline struct brw_instruction *current_insn( struct brw_compile *p) +{ +	return &p->store[p->nr_insn]; +} + +void brw_pop_insn_state( struct brw_compile *p ); +void brw_push_insn_state( struct brw_compile *p ); +void brw_set_mask_control( struct brw_compile *p, unsigned value ); +void brw_set_saturate( struct brw_compile *p, unsigned value ); +void brw_set_access_mode( struct brw_compile *p, unsigned access_mode ); +void brw_set_compression_control( struct brw_compile *p, boolean control ); +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ); +void brw_set_predicate_control( struct brw_compile *p, unsigned pc ); +void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ); + +void brw_init_compile( struct brw_compile *p ); +const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz ); + + +struct brw_instruction *brw_alu1( struct brw_compile *p, +				  unsigned opcode, +				  struct brw_reg dest, +				  struct brw_reg src ); + +struct brw_instruction *brw_alu2(struct brw_compile *p, +				 unsigned opcode, +				 struct brw_reg dest, +				 struct brw_reg src0, +				 struct brw_reg src1 ); + +/* Helpers for regular instructions: + */ +#define ALU1(OP)					\ +struct brw_instruction *brw_##OP(struct brw_compile *p,	\ +	      struct brw_reg dest,			\ +	      struct brw_reg src0); + +#define ALU2(OP)					\ +struct brw_instruction *brw_##OP(struct brw_compile *p,	\ +	      struct brw_reg dest,			\ +	      struct brw_reg src0,			\ +	      struct brw_reg src1); + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(JMPI) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + +#undef ALU1 +#undef ALU2 + + + +/* Helpers for SEND instruction: + */ +void brw_urb_WRITE(struct brw_compile *p, +		   struct brw_reg dest, +		   unsigned msg_reg_nr, +		   struct brw_reg src0, +		   boolean allocate, +		   boolean used, +		   unsigned msg_length, +		   unsigned response_length, +		   boolean eot, +		   boolean writes_complete, +		   unsigned offset, +		   unsigned swizzle); + +void brw_fb_WRITE(struct brw_compile *p, +		   struct brw_reg dest, +		   unsigned msg_reg_nr, +		   struct brw_reg src0, +		   unsigned binding_table_index, +		   unsigned msg_length, +		   unsigned response_length, +		   boolean eot); + +void brw_SAMPLE(struct brw_compile *p, +		struct brw_reg dest, +		unsigned msg_reg_nr, +		struct brw_reg src0, +		unsigned binding_table_index, +		unsigned sampler, +		unsigned writemask, +		unsigned msg_type, +		unsigned response_length, +		unsigned msg_length, +		boolean eot); + +void brw_math_16( struct brw_compile *p, +		  struct brw_reg dest, +		  unsigned function, +		  unsigned saturate, +		  unsigned msg_reg_nr, +		  struct brw_reg src, +		  unsigned precision ); + +void brw_math( struct brw_compile *p, +	       struct brw_reg dest, +	       unsigned function, +	       unsigned saturate, +	       unsigned msg_reg_nr, +	       struct brw_reg src, +	       unsigned data_type, +	       unsigned precision ); + +void brw_dp_READ_16( struct brw_compile *p, +		     struct brw_reg dest, +		     unsigned msg_reg_nr, +		     unsigned scratch_offset ); + +void brw_dp_WRITE_16( struct brw_compile *p, +		      struct brw_reg src, +		      unsigned msg_reg_nr, +		      unsigned scratch_offset ); + +/* If/else/endif.  Works by manipulating the execution flags on each + * channel. + */ +struct brw_instruction *brw_IF(struct brw_compile *p, +			       unsigned execute_size); + +struct brw_instruction *brw_ELSE(struct brw_compile *p, +				 struct brw_instruction *if_insn); + +void brw_ENDIF(struct brw_compile *p, +	       struct brw_instruction *if_or_else_insn); + + +/* DO/WHILE loops: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, +			       unsigned execute_size); + +struct brw_instruction *brw_WHILE(struct brw_compile *p, +	       struct brw_instruction *patch_insn); + +struct brw_instruction *brw_BREAK(struct brw_compile *p); +struct brw_instruction *brw_CONT(struct brw_compile *p); +/* Forward jumps: + */ +void brw_land_fwd_jump(struct brw_compile *p, +		       struct brw_instruction *jmp_insn); + + + +void brw_NOP(struct brw_compile *p); + +/* Special case: there is never a destination, execution size will be + * taken from src0: + */ +void brw_CMP(struct brw_compile *p, +	     struct brw_reg dest, +	     unsigned conditional, +	     struct brw_reg src0, +	     struct brw_reg src1); + +void brw_print_reg( struct brw_reg reg ); + + +/*********************************************************************** + * brw_eu_util.c: + */ + +void brw_copy_indirect_to_indirect(struct brw_compile *p, +				   struct brw_indirect dst_ptr, +				   struct brw_indirect src_ptr, +				   unsigned count); + +void brw_copy_from_indirect(struct brw_compile *p, +			    struct brw_reg dst, +			    struct brw_indirect ptr, +			    unsigned count); + +void brw_copy4(struct brw_compile *p, +	       struct brw_reg dst, +	       struct brw_reg src, +	       unsigned count); + +void brw_copy8(struct brw_compile *p, +	       struct brw_reg dst, +	       struct brw_reg src, +	       unsigned count); + +void brw_math_invert( struct brw_compile *p, +		      struct brw_reg dst, +		      struct brw_reg src); + +void brw_set_src1( struct brw_instruction *insn, +                          struct brw_reg reg ); +#endif diff --git a/src/gallium/drivers/i965simple/brw_eu_debug.c b/src/gallium/drivers/i965simple/brw_eu_debug.c new file mode 100644 index 0000000000..4a94ddefa6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu_debug.c @@ -0,0 +1,90 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. +  + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: +  + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. +  + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +  + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ +     + +#include "pipe/p_debug.h" + +#include "brw_eu.h" + +void brw_print_reg( struct brw_reg hwreg ) +{ +   static const char *file[] = { +      "arf", +      "grf", +      "msg", +      "imm" +   }; + +   static const char *type[] = { +      "ud", +      "d", +      "uw", +      "w", +      "ub", +      "vf", +      "hf", +      "f" +   }; + +   debug_printf("%s%s",  +		hwreg.abs ? "abs/" : "", +		hwreg.negate ? "-" : ""); +      +   if (hwreg.file == BRW_GENERAL_REGISTER_FILE && +       hwreg.nr % 2 == 0 && +       hwreg.subnr == 0 && +       hwreg.vstride == BRW_VERTICAL_STRIDE_8 && +       hwreg.width == BRW_WIDTH_8 && +       hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && +       hwreg.type == BRW_REGISTER_TYPE_F) { +      debug_printf("vec%d", hwreg.nr); +   } +   else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && +	    hwreg.vstride == BRW_VERTICAL_STRIDE_0 && +	    hwreg.width == BRW_WIDTH_1 && +	    hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && +	    hwreg.type == BRW_REGISTER_TYPE_F) {       +      debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); +   } +   else { +      debug_printf("%s%d.%d<%d;%d,%d>:%s",  +		   file[hwreg.file], +		   hwreg.nr, +		   hwreg.subnr / type_sz(hwreg.type), +		   hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0, +		   1<<hwreg.width, +		   hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0,		 +		   type[hwreg.type]); +   } +} + + + diff --git a/src/gallium/drivers/i965simple/brw_eu_emit.c b/src/gallium/drivers/i965simple/brw_eu_emit.c new file mode 100644 index 0000000000..400a80b6fb --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu_emit.c @@ -0,0 +1,1080 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + + + +/*********************************************************************** + * Internal helper for constructing instructions + */ + +static void guess_execution_size( struct brw_instruction *insn, +				  struct brw_reg reg ) +{ +   if (reg.width == BRW_WIDTH_8 && +       insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) +      insn->header.execution_size = BRW_EXECUTE_16; +   else +      insn->header.execution_size = reg.width;	/* note - definitions are compatible */ +} + + +static void brw_set_dest( struct brw_instruction *insn, +			  struct brw_reg dest ) +{ +   insn->bits1.da1.dest_reg_file = dest.file; +   insn->bits1.da1.dest_reg_type = dest.type; +   insn->bits1.da1.dest_address_mode = dest.address_mode; + +   if (dest.address_mode == BRW_ADDRESS_DIRECT) { +      insn->bits1.da1.dest_reg_nr = dest.nr; + +      if (insn->header.access_mode == BRW_ALIGN_1) { +	 insn->bits1.da1.dest_subreg_nr = dest.subnr; +	 insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; +      } +      else { +	 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; +	 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; +      } +   } +   else { +      insn->bits1.ia1.dest_subreg_nr = dest.subnr; + +      /* These are different sizes in align1 vs align16: +       */ +      if (insn->header.access_mode == BRW_ALIGN_1) { +	 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; +	 insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; +      } +      else { +	 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; +      } +   } + +   /* NEW: Set the execution size based on dest.width and +    * insn->compression_control: +    */ +   guess_execution_size(insn, dest); +} + +static void brw_set_src0( struct brw_instruction *insn, +		      struct brw_reg reg ) +{ +   assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + +   insn->bits1.da1.src0_reg_file = reg.file; +   insn->bits1.da1.src0_reg_type = reg.type; +   insn->bits2.da1.src0_abs = reg.abs; +   insn->bits2.da1.src0_negate = reg.negate; +   insn->bits2.da1.src0_address_mode = reg.address_mode; + +   if (reg.file == BRW_IMMEDIATE_VALUE) { +      insn->bits3.ud = reg.dw1.ud; + +      /* Required to set some fields in src1 as well: +       */ +      insn->bits1.da1.src1_reg_file = 0; /* arf */ +      insn->bits1.da1.src1_reg_type = reg.type; +   } +   else +   { +      if (reg.address_mode == BRW_ADDRESS_DIRECT) { +	 if (insn->header.access_mode == BRW_ALIGN_1) { +	    insn->bits2.da1.src0_subreg_nr = reg.subnr; +	    insn->bits2.da1.src0_reg_nr = reg.nr; +	 } +	 else { +	    insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; +	    insn->bits2.da16.src0_reg_nr = reg.nr; +	 } +      } +      else { +	 insn->bits2.ia1.src0_subreg_nr = reg.subnr; + +	 if (insn->header.access_mode == BRW_ALIGN_1) { +	    insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; +	 } +	 else { +	    insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; +	 } +      } + +      if (insn->header.access_mode == BRW_ALIGN_1) { +	 if (reg.width == BRW_WIDTH_1 && +	     insn->header.execution_size == BRW_EXECUTE_1) { +	    insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; +	    insn->bits2.da1.src0_width = BRW_WIDTH_1; +	    insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; +	 } +	 else { +	    insn->bits2.da1.src0_horiz_stride = reg.hstride; +	    insn->bits2.da1.src0_width = reg.width; +	    insn->bits2.da1.src0_vert_stride = reg.vstride; +	 } +      } +      else { +	 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); +	 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); +	 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); +	 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + +	 /* This is an oddity of the fact we're using the same +	  * descriptions for registers in align_16 as align_1: +	  */ +	 if (reg.vstride == BRW_VERTICAL_STRIDE_8) +	    insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; +	 else +	    insn->bits2.da16.src0_vert_stride = reg.vstride; +      } +   } +} + + +void brw_set_src1( struct brw_instruction *insn, +			  struct brw_reg reg ) +{ +   assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + +   insn->bits1.da1.src1_reg_file = reg.file; +   insn->bits1.da1.src1_reg_type = reg.type; +   insn->bits3.da1.src1_abs = reg.abs; +   insn->bits3.da1.src1_negate = reg.negate; + +   /* Only src1 can be immediate in two-argument instructions. +    */ +   assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); + +   if (reg.file == BRW_IMMEDIATE_VALUE) { +      insn->bits3.ud = reg.dw1.ud; +   } +   else { +      /* This is a hardware restriction, which may or may not be lifted +       * in the future: +       */ +      assert (reg.address_mode == BRW_ADDRESS_DIRECT); +      //assert (reg.file == BRW_GENERAL_REGISTER_FILE); + +      if (insn->header.access_mode == BRW_ALIGN_1) { +	 insn->bits3.da1.src1_subreg_nr = reg.subnr; +	 insn->bits3.da1.src1_reg_nr = reg.nr; +      } +      else { +	 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; +	 insn->bits3.da16.src1_reg_nr = reg.nr; +      } + +      if (insn->header.access_mode == BRW_ALIGN_1) { +	 if (reg.width == BRW_WIDTH_1 && +	     insn->header.execution_size == BRW_EXECUTE_1) { +	    insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; +	    insn->bits3.da1.src1_width = BRW_WIDTH_1; +	    insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; +	 } +	 else { +	    insn->bits3.da1.src1_horiz_stride = reg.hstride; +	    insn->bits3.da1.src1_width = reg.width; +	    insn->bits3.da1.src1_vert_stride = reg.vstride; +	 } +      } +      else { +	 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); +	 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); +	 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); +	 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + +	 /* This is an oddity of the fact we're using the same +	  * descriptions for registers in align_16 as align_1: +	  */ +	 if (reg.vstride == BRW_VERTICAL_STRIDE_8) +	    insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; +	 else +	    insn->bits3.da16.src1_vert_stride = reg.vstride; +      } +   } +} + + + +static void brw_set_math_message( struct brw_instruction *insn, +				  unsigned msg_length, +				  unsigned response_length, +				  unsigned function, +				  unsigned integer_type, +				  boolean low_precision, +				  boolean saturate, +				  unsigned dataType ) +{ +   brw_set_src1(insn, brw_imm_d(0)); + +   insn->bits3.math.function = function; +   insn->bits3.math.int_type = integer_type; +   insn->bits3.math.precision = low_precision; +   insn->bits3.math.saturate = saturate; +   insn->bits3.math.data_type = dataType; +   insn->bits3.math.response_length = response_length; +   insn->bits3.math.msg_length = msg_length; +   insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; +   insn->bits3.math.end_of_thread = 0; +} + +static void brw_set_urb_message( struct brw_instruction *insn, +				 boolean allocate, +				 boolean used, +				 unsigned msg_length, +				 unsigned response_length, +				 boolean end_of_thread, +				 boolean complete, +				 unsigned offset, +				 unsigned swizzle_control ) +{ +   brw_set_src1(insn, brw_imm_d(0)); + +   insn->bits3.urb.opcode = 0;	/* ? */ +   insn->bits3.urb.offset = offset; +   insn->bits3.urb.swizzle_control = swizzle_control; +   insn->bits3.urb.allocate = allocate; +   insn->bits3.urb.used = used;	/* ? */ +   insn->bits3.urb.complete = complete; +   insn->bits3.urb.response_length = response_length; +   insn->bits3.urb.msg_length = msg_length; +   insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; +   insn->bits3.urb.end_of_thread = end_of_thread; +} + +static void brw_set_dp_write_message( struct brw_instruction *insn, +				      unsigned binding_table_index, +				      unsigned msg_control, +				      unsigned msg_type, +				      unsigned msg_length, +				      unsigned pixel_scoreboard_clear, +				      unsigned response_length, +				      unsigned end_of_thread ) +{ +   brw_set_src1(insn, brw_imm_d(0)); + +   insn->bits3.dp_write.binding_table_index = binding_table_index; +   insn->bits3.dp_write.msg_control = msg_control; +   insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; +   insn->bits3.dp_write.msg_type = msg_type; +   insn->bits3.dp_write.send_commit_msg = 0; +   insn->bits3.dp_write.response_length = response_length; +   insn->bits3.dp_write.msg_length = msg_length; +   insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; +   insn->bits3.urb.end_of_thread = end_of_thread; +} + +static void brw_set_dp_read_message( struct brw_instruction *insn, +				      unsigned binding_table_index, +				      unsigned msg_control, +				      unsigned msg_type, +				      unsigned target_cache, +				      unsigned msg_length, +				      unsigned response_length, +				      unsigned end_of_thread ) +{ +   brw_set_src1(insn, brw_imm_d(0)); + +   insn->bits3.dp_read.binding_table_index = binding_table_index; +   insn->bits3.dp_read.msg_control = msg_control; +   insn->bits3.dp_read.msg_type = msg_type; +   insn->bits3.dp_read.target_cache = target_cache; +   insn->bits3.dp_read.response_length = response_length; +   insn->bits3.dp_read.msg_length = msg_length; +   insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; +   insn->bits3.dp_read.end_of_thread = end_of_thread; +} + +static void brw_set_sampler_message( struct brw_instruction *insn, +				     unsigned binding_table_index, +				     unsigned sampler, +				     unsigned msg_type, +				     unsigned response_length, +				     unsigned msg_length, +				     boolean eot) +{ +   brw_set_src1(insn, brw_imm_d(0)); + +   insn->bits3.sampler.binding_table_index = binding_table_index; +   insn->bits3.sampler.sampler = sampler; +   insn->bits3.sampler.msg_type = msg_type; +   insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; +   insn->bits3.sampler.response_length = response_length; +   insn->bits3.sampler.msg_length = msg_length; +   insn->bits3.sampler.end_of_thread = eot; +   insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; +} + + + +static struct brw_instruction *next_insn( struct brw_compile *p, +					  unsigned opcode ) +{ +   struct brw_instruction *insn; + +   assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); + +   insn = &p->store[p->nr_insn++]; +   memcpy(insn, p->current, sizeof(*insn)); + +   /* Reset this one-shot flag: +    */ + +   if (p->current->header.destreg__conditonalmod) { +      p->current->header.destreg__conditonalmod = 0; +      p->current->header.predicate_control = BRW_PREDICATE_NORMAL; +   } + +   insn->header.opcode = opcode; +   return insn; +} + + +struct brw_instruction *brw_alu1( struct brw_compile *p, +				  unsigned opcode, +				  struct brw_reg dest, +				  struct brw_reg src ) +{ +   struct brw_instruction *insn = next_insn(p, opcode); +   brw_set_dest(insn, dest); +   brw_set_src0(insn, src); +   return insn; +} + +struct brw_instruction *brw_alu2(struct brw_compile *p, +				 unsigned opcode, +				 struct brw_reg dest, +				 struct brw_reg src0, +				 struct brw_reg src1 ) +{ +   struct brw_instruction *insn = next_insn(p, opcode); +   brw_set_dest(insn, dest); +   brw_set_src0(insn, src0); +   brw_set_src1(insn, src1); +   return insn; +} + + +/*********************************************************************** + * Convenience routines. + */ +#define ALU1(OP)					\ +struct brw_instruction *brw_##OP(struct brw_compile *p,			\ +	      struct brw_reg dest,			\ +	      struct brw_reg src0)   			\ +{							\ +   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\ +} + +#define ALU2(OP)					\ +struct brw_instruction *brw_##OP(struct brw_compile *p,			\ +	      struct brw_reg dest,			\ +	      struct brw_reg src0,			\ +	      struct brw_reg src1)   			\ +{							\ +   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\ +} + + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + + + + +void brw_NOP(struct brw_compile *p) +{ +   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); +   brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); +   brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); +   brw_set_src1(insn, brw_imm_ud(0x0)); +} + + + + + +/*********************************************************************** + * Comparisons, if/else/endif + */ + +struct brw_instruction *brw_JMPI(struct brw_compile *p, +	      struct brw_reg dest, +	      struct brw_reg src0, +	      struct brw_reg src1) +{ +   struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + +   p->current->header.predicate_control = BRW_PREDICATE_NONE; + +   return insn; +} + +/* EU takes the value from the flag register and pushes it onto some + * sort of a stack (presumably merging with any flag value already on + * the stack).  Within an if block, the flags at the top of the stack + * control execution on each channel of the unit, eg. on each of the + * 16 pixel values in our wm programs. + * + * When the matching 'else' instruction is reached (presumably by + * countdown of the instruction count patched in by our ELSE/ENDIF + * functions), the relevent flags are inverted. + * + * When the matching 'endif' instruction is reached, the flags are + * popped off.  If the stack is now empty, normal execution resumes. + * + * No attempt is made to deal with stack overflow (14 elements?). + */ +struct brw_instruction *brw_IF(struct brw_compile *p, unsigned execute_size) +{ +   struct brw_instruction *insn; + +   if (p->single_program_flow) { +      assert(execute_size == BRW_EXECUTE_1); + +      insn = next_insn(p, BRW_OPCODE_ADD); +      insn->header.predicate_inverse = 1; +   } else { +      insn = next_insn(p, BRW_OPCODE_IF); +   } + +   /* Override the defaults for this instruction: +    */ +   brw_set_dest(insn, brw_ip_reg()); +   brw_set_src0(insn, brw_ip_reg()); +   brw_set_src1(insn, brw_imm_d(0x0)); + +   insn->header.execution_size = execute_size; +   insn->header.compression_control = BRW_COMPRESSION_NONE; +   insn->header.predicate_control = BRW_PREDICATE_NORMAL; +   insn->header.mask_control = BRW_MASK_ENABLE; + +   p->current->header.predicate_control = BRW_PREDICATE_NONE; + +   return insn; +} + + +struct brw_instruction *brw_ELSE(struct brw_compile *p, +				 struct brw_instruction *if_insn) +{ +   struct brw_instruction *insn; + +   if (p->single_program_flow) { +      insn = next_insn(p, BRW_OPCODE_ADD); +   } else { +      insn = next_insn(p, BRW_OPCODE_ELSE); +   } + +   brw_set_dest(insn, brw_ip_reg()); +   brw_set_src0(insn, brw_ip_reg()); +   brw_set_src1(insn, brw_imm_d(0x0)); + +   insn->header.compression_control = BRW_COMPRESSION_NONE; +   insn->header.execution_size = if_insn->header.execution_size; +   insn->header.mask_control = BRW_MASK_ENABLE; + +   /* Patch the if instruction to point at this instruction. +    */ +   if (p->single_program_flow) { +      assert(if_insn->header.opcode == BRW_OPCODE_ADD); + +      if_insn->bits3.ud = (insn - if_insn + 1) * 16; +   } else { +      assert(if_insn->header.opcode == BRW_OPCODE_IF); + +      if_insn->bits3.if_else.jump_count = insn - if_insn; +      if_insn->bits3.if_else.pop_count = 1; +      if_insn->bits3.if_else.pad0 = 0; +   } + +   return insn; +} + +void brw_ENDIF(struct brw_compile *p, +	       struct brw_instruction *patch_insn) +{ +   if (p->single_program_flow) { +      /* In single program flow mode, there's no need to execute an ENDIF, +       * since we don't need to do any stack operations, and if we're executing +       * currently, we want to just continue executing. +       */ +      struct brw_instruction *next = &p->store[p->nr_insn]; + +      assert(patch_insn->header.opcode == BRW_OPCODE_ADD); + +      patch_insn->bits3.ud = (next - patch_insn) * 16; +   } else { +      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); + +      brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); +      brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); +      brw_set_src1(insn, brw_imm_d(0x0)); + +      insn->header.compression_control = BRW_COMPRESSION_NONE; +      insn->header.execution_size = patch_insn->header.execution_size; +      insn->header.mask_control = BRW_MASK_ENABLE; + +      assert(patch_insn->bits3.if_else.jump_count == 0); + +      /* Patch the if or else instructions to point at this or the next +       * instruction respectively. +       */ +      if (patch_insn->header.opcode == BRW_OPCODE_IF) { +	 /* Automagically turn it into an IFF: +	  */ +	 patch_insn->header.opcode = BRW_OPCODE_IFF; +	 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; +	 patch_insn->bits3.if_else.pop_count = 0; +	 patch_insn->bits3.if_else.pad0 = 0; +      } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { +	 patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; +	 patch_insn->bits3.if_else.pop_count = 1; +	 patch_insn->bits3.if_else.pad0 = 0; +      } else { +	 assert(0); +      } + +      /* Also pop item off the stack in the endif instruction: +       */ +      insn->bits3.if_else.jump_count = 0; +      insn->bits3.if_else.pop_count = 1; +      insn->bits3.if_else.pad0 = 0; +   } +} + +struct brw_instruction *brw_BREAK(struct brw_compile *p) +{ +   struct brw_instruction *insn; +   insn = next_insn(p, BRW_OPCODE_BREAK); +   brw_set_dest(insn, brw_ip_reg()); +   brw_set_src0(insn, brw_ip_reg()); +   brw_set_src1(insn, brw_imm_d(0x0)); +   insn->header.compression_control = BRW_COMPRESSION_NONE; +   insn->header.execution_size = BRW_EXECUTE_8; +   insn->header.mask_control = BRW_MASK_DISABLE; +   insn->bits3.if_else.pad0 = 0; +   return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p) +{ +   struct brw_instruction *insn; +   insn = next_insn(p, BRW_OPCODE_CONTINUE); +   brw_set_dest(insn, brw_ip_reg()); +   brw_set_src0(insn, brw_ip_reg()); +   brw_set_src1(insn, brw_imm_d(0x0)); +   insn->header.compression_control = BRW_COMPRESSION_NONE; +   insn->header.execution_size = BRW_EXECUTE_8; +   insn->header.mask_control = BRW_MASK_DISABLE; +   insn->bits3.if_else.pad0 = 0; +   return insn; +} + +/* DO/WHILE loop: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) +{ +   if (p->single_program_flow) { +      return &p->store[p->nr_insn]; +   } else { +      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); + +      /* Override the defaults for this instruction: +       */ +      brw_set_dest(insn, brw_null_reg()); +      brw_set_src0(insn, brw_null_reg()); +      brw_set_src1(insn, brw_null_reg()); + +      insn->header.compression_control = BRW_COMPRESSION_NONE; +      insn->header.execution_size = execute_size; +      insn->header.predicate_control = BRW_PREDICATE_NONE; +      /* insn->header.mask_control = BRW_MASK_ENABLE; */ +      insn->header.mask_control = BRW_MASK_DISABLE; + +      return insn; +   } +} + + + +struct brw_instruction *brw_WHILE(struct brw_compile *p, +	       struct brw_instruction *do_insn) +{ +   struct brw_instruction *insn; + +   if (p->single_program_flow) +      insn = next_insn(p, BRW_OPCODE_ADD); +   else +      insn = next_insn(p, BRW_OPCODE_WHILE); + +   brw_set_dest(insn, brw_ip_reg()); +   brw_set_src0(insn, brw_ip_reg()); +   brw_set_src1(insn, brw_imm_d(0x0)); + +   insn->header.compression_control = BRW_COMPRESSION_NONE; + +   if (p->single_program_flow) { +      insn->header.execution_size = BRW_EXECUTE_1; + +      insn->bits3.d = (do_insn - insn) * 16; +   } else { +      insn->header.execution_size = do_insn->header.execution_size; + +      assert(do_insn->header.opcode == BRW_OPCODE_DO); +      insn->bits3.if_else.jump_count = do_insn - insn; +      insn->bits3.if_else.pop_count = 0; +      insn->bits3.if_else.pad0 = 0; +   } + +/*    insn->header.mask_control = BRW_MASK_ENABLE; */ + +   insn->header.mask_control = BRW_MASK_DISABLE; +   p->current->header.predicate_control = BRW_PREDICATE_NONE; +   return insn; +} + + +/* FORWARD JUMPS: + */ +void brw_land_fwd_jump(struct brw_compile *p, +		       struct brw_instruction *jmp_insn) +{ +   struct brw_instruction *landing = &p->store[p->nr_insn]; + +   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); +   assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + +   jmp_insn->bits3.ud = (landing - jmp_insn) - 1; +} + + + +/* To integrate with the above, it makes sense that the comparison + * instruction should populate the flag register.  It might be simpler + * just to use the flag reg for most WM tasks? + */ +void brw_CMP(struct brw_compile *p, +	     struct brw_reg dest, +	     unsigned conditional, +	     struct brw_reg src0, +	     struct brw_reg src1) +{ +   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); + +   insn->header.destreg__conditonalmod = conditional; +   brw_set_dest(insn, dest); +   brw_set_src0(insn, src0); +   brw_set_src1(insn, src1); + +/*    guess_execution_size(insn, src0); */ + + +   /* Make it so that future instructions will use the computed flag +    * value until brw_set_predicate_control_flag_value() is called +    * again. +    */ +   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && +       dest.nr == 0) { +      p->current->header.predicate_control = BRW_PREDICATE_NORMAL; +      p->flag_value = 0xff; +   } +} + + + +/*********************************************************************** + * Helpers for the various SEND message types: + */ + +/* Invert 8 values + */ +void brw_math( struct brw_compile *p, +	       struct brw_reg dest, +	       unsigned function, +	       unsigned saturate, +	       unsigned msg_reg_nr, +	       struct brw_reg src, +	       unsigned data_type, +	       unsigned precision ) +{ +   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); +   unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; +   unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + +   /* Example code doesn't set predicate_control for send +    * instructions. +    */ +   insn->header.predicate_control = 0; +   insn->header.destreg__conditonalmod = msg_reg_nr; + +   brw_set_dest(insn, dest); +   brw_set_src0(insn, src); +   brw_set_math_message(insn, +			msg_length, response_length, +			function, +			BRW_MATH_INTEGER_UNSIGNED, +			precision, +			saturate, +			data_type); +} + +/* Use 2 send instructions to invert 16 elements + */ +void brw_math_16( struct brw_compile *p, +		  struct brw_reg dest, +		  unsigned function, +		  unsigned saturate, +		  unsigned msg_reg_nr, +		  struct brw_reg src, +		  unsigned precision ) +{ +   struct brw_instruction *insn; +   unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; +   unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + +   /* First instruction: +    */ +   brw_push_insn_state(p); +   brw_set_predicate_control_flag_value(p, 0xff); +   brw_set_compression_control(p, BRW_COMPRESSION_NONE); + +   insn = next_insn(p, BRW_OPCODE_SEND); +   insn->header.destreg__conditonalmod = msg_reg_nr; + +   brw_set_dest(insn, dest); +   brw_set_src0(insn, src); +   brw_set_math_message(insn, +			msg_length, response_length, +			function, +			BRW_MATH_INTEGER_UNSIGNED, +			precision, +			saturate, +			BRW_MATH_DATA_VECTOR); + +   /* Second instruction: +    */ +   insn = next_insn(p, BRW_OPCODE_SEND); +   insn->header.compression_control = BRW_COMPRESSION_2NDHALF; +   insn->header.destreg__conditonalmod = msg_reg_nr+1; + +   brw_set_dest(insn, offset(dest,1)); +   brw_set_src0(insn, src); +   brw_set_math_message(insn, +			msg_length, response_length, +			function, +			BRW_MATH_INTEGER_UNSIGNED, +			precision, +			saturate, +			BRW_MATH_DATA_VECTOR); + +   brw_pop_insn_state(p); +} + + + + +void brw_dp_WRITE_16( struct brw_compile *p, +		      struct brw_reg src, +		      unsigned msg_reg_nr, +		      unsigned scratch_offset ) +{ +   { +      brw_push_insn_state(p); +      brw_set_mask_control(p, BRW_MASK_DISABLE); +      brw_set_compression_control(p, BRW_COMPRESSION_NONE); + +      brw_MOV(p, +	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), +	      brw_imm_d(scratch_offset)); + +      brw_pop_insn_state(p); +   } + +   { +      unsigned msg_length = 3; +      struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); +      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + +      insn->header.predicate_control = 0; /* XXX */ +      insn->header.compression_control = BRW_COMPRESSION_NONE; +      insn->header.destreg__conditonalmod = msg_reg_nr; + +      brw_set_dest(insn, dest); +      brw_set_src0(insn, src); + +      brw_set_dp_write_message(insn, +			       255, /* bti */ +			       BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ +			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ +			       msg_length, +			       0, /* pixel scoreboard */ +			       0, /* response_length */ +			       0); /* eot */ +   } + +} + + +void brw_dp_READ_16( struct brw_compile *p, +		      struct brw_reg dest, +		      unsigned msg_reg_nr, +		      unsigned scratch_offset ) +{ +   { +      brw_push_insn_state(p); +      brw_set_compression_control(p, BRW_COMPRESSION_NONE); +      brw_set_mask_control(p, BRW_MASK_DISABLE); + +      brw_MOV(p, +	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), +	      brw_imm_d(scratch_offset)); + +      brw_pop_insn_state(p); +   } + +   { +      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + +      insn->header.predicate_control = 0; /* XXX */ +      insn->header.compression_control = BRW_COMPRESSION_NONE; +      insn->header.destreg__conditonalmod = msg_reg_nr; + +      brw_set_dest(insn, dest);	/* UW? */ +      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); + +      brw_set_dp_read_message(insn, +			      255, /* bti */ +			      3,  /* msg_control */ +			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ +			      1, /* target cache */ +			      1, /* msg_length */ +			      2, /* response_length */ +			      0); /* eot */ +   } +} + + +void brw_fb_WRITE(struct brw_compile *p, +		   struct brw_reg dest, +		   unsigned msg_reg_nr, +		   struct brw_reg src0, +		   unsigned binding_table_index, +		   unsigned msg_length, +		   unsigned response_length, +		   boolean eot) +{ +   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + +   insn->header.predicate_control = 0; /* XXX */ +   insn->header.compression_control = BRW_COMPRESSION_NONE; +   insn->header.destreg__conditonalmod = msg_reg_nr; + +   brw_set_dest(insn, dest); +   brw_set_src0(insn, src0); +   brw_set_dp_write_message(insn, +			    binding_table_index, +			    BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ +			    BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ +			    msg_length, +			    1,	/* pixel scoreboard */ +			    response_length, +			    eot); +} + + + +void brw_SAMPLE(struct brw_compile *p, +		struct brw_reg dest, +		unsigned msg_reg_nr, +		struct brw_reg src0, +		unsigned binding_table_index, +		unsigned sampler, +		unsigned writemask, +		unsigned msg_type, +		unsigned response_length, +		unsigned msg_length, +		boolean eot) +{ +   boolean need_stall = 0; + +   if(writemask == 0) { +/*       debug_printf("%s: zero writemask??\n", __FUNCTION__); */ +      return; +   } + +   /* Hardware doesn't do destination dependency checking on send +    * instructions properly.  Add a workaround which generates the +    * dependency by other means.  In practice it seems like this bug +    * only crops up for texture samples, and only where registers are +    * written by the send and then written again later without being +    * read in between.  Luckily for us, we already track that +    * information and use it to modify the writemask for the +    * instruction, so that is a guide for whether a workaround is +    * needed. +    */ +   if (writemask != TGSI_WRITEMASK_XYZW) { +      unsigned dst_offset = 0; +      unsigned i, newmask = 0, len = 0; + +      for (i = 0; i < 4; i++) { +	 if (writemask & (1<<i)) +	    break; +	 dst_offset += 2; +      } +      for (; i < 4; i++) { +	 if (!(writemask & (1<<i))) +	    break; +	 newmask |= 1<<i; +	 len++; +      } + +      if (newmask != writemask) { +	 need_stall = 1; +/* 	 debug_printf("need stall %x %x\n", newmask , writemask); */ +      } +      else { +	 struct brw_reg m1 = brw_message_reg(msg_reg_nr); + +	 newmask = ~newmask & TGSI_WRITEMASK_XYZW; + +	 brw_push_insn_state(p); + +	 brw_set_compression_control(p, BRW_COMPRESSION_NONE); +	 brw_set_mask_control(p, BRW_MASK_DISABLE); + +	 brw_MOV(p, m1, brw_vec8_grf(0,0)); +  	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); + +	 brw_pop_insn_state(p); + +  	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); +	 dest = offset(dest, dst_offset); +	 response_length = len * 2; +      } +   } + +   { +      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + +      insn->header.predicate_control = 0; /* XXX */ +      insn->header.compression_control = BRW_COMPRESSION_NONE; +      insn->header.destreg__conditonalmod = msg_reg_nr; + +      brw_set_dest(insn, dest); +      brw_set_src0(insn, src0); +      brw_set_sampler_message(insn, +			      binding_table_index, +			      sampler, +			      msg_type, +			      response_length, +			      msg_length, +			      eot); +   } + +   if (need_stall) +   { +      struct brw_reg reg = vec8(offset(dest, response_length-1)); + +      /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 } +       */ +      brw_push_insn_state(p); +      brw_set_compression_control(p, FALSE); +      brw_MOV(p, reg, reg); +      brw_pop_insn_state(p); +   } + +} + +/* All these variables are pretty confusing - we might be better off + * using bitmasks and macros for this, in the old style.  Or perhaps + * just having the caller instantiate the fields in dword3 itself. + */ +void brw_urb_WRITE(struct brw_compile *p, +		   struct brw_reg dest, +		   unsigned msg_reg_nr, +		   struct brw_reg src0, +		   boolean allocate, +		   boolean used, +		   unsigned msg_length, +		   unsigned response_length, +		   boolean eot, +		   boolean writes_complete, +		   unsigned offset, +		   unsigned swizzle) +{ +   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + +   assert(msg_length < 16); + +   brw_set_dest(insn, dest); +   brw_set_src0(insn, src0); +   brw_set_src1(insn, brw_imm_d(0)); + +   insn->header.destreg__conditonalmod = msg_reg_nr; + +   brw_set_urb_message(insn, +		       allocate, +		       used, +		       msg_length, +		       response_length, +		       eot, +		       writes_complete, +		       offset, +		       swizzle); +} + diff --git a/src/gallium/drivers/i965simple/brw_eu_util.c b/src/gallium/drivers/i965simple/brw_eu_util.c new file mode 100644 index 0000000000..3a65b141f0 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu_util.c @@ -0,0 +1,126 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. +  + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: +  + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. +  + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +  + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ +       + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + +void brw_math_invert( struct brw_compile *p,  +			     struct brw_reg dst, +			     struct brw_reg src) +{ +   brw_math( p,  +	     dst, +	     BRW_MATH_FUNCTION_INV,  +	     BRW_MATH_SATURATE_NONE, +	     0, +	     src, +	     BRW_MATH_PRECISION_FULL,  +	     BRW_MATH_DATA_VECTOR ); +} + + + +void brw_copy4(struct brw_compile *p, +	       struct brw_reg dst, +	       struct brw_reg src, +	       unsigned count) +{ +   unsigned i; + +   dst = vec4(dst); +   src = vec4(src); + +   for (i = 0; i < count; i++) +   { +      unsigned delta = i*32; +      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta)); +      brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); +   } +} + + +void brw_copy8(struct brw_compile *p, +	       struct brw_reg dst, +	       struct brw_reg src, +	       unsigned count) +{ +   unsigned i; + +   dst = vec8(dst); +   src = vec8(src); + +   for (i = 0; i < count; i++) +   { +      unsigned delta = i*32; +      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta)); +   } +} + + +void brw_copy_indirect_to_indirect(struct brw_compile *p, +				   struct brw_indirect dst_ptr, +				   struct brw_indirect src_ptr, +				   unsigned count) +{ +   unsigned i; + +   for (i = 0; i < count; i++) +   { +      unsigned delta = i*32; +      brw_MOV(p, deref_4f(dst_ptr, delta),    deref_4f(src_ptr, delta)); +      brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16)); +   } +} + + +void brw_copy_from_indirect(struct brw_compile *p, +			    struct brw_reg dst, +			    struct brw_indirect ptr, +			    unsigned count) +{ +   unsigned i; + +   dst = vec4(dst); + +   for (i = 0; i < count; i++) +   { +      unsigned delta = i*32; +      brw_MOV(p, byte_offset(dst, delta),    deref_4f(ptr, delta)); +      brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); +   } +} + + + + diff --git a/src/gallium/drivers/i965simple/brw_flush.c b/src/gallium/drivers/i965simple/brw_flush.c new file mode 100644 index 0000000000..e6001c30d9 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_flush.c @@ -0,0 +1,73 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Author: + *    Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_defines.h" +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_batch.h" + + +static void brw_flush( struct pipe_context *pipe, +                       unsigned flags, +                       struct pipe_fence_handle **fence ) +{ +   struct brw_context *brw = brw_context(pipe); + +   /* Do we need to emit an MI_FLUSH command to flush the hardware +    * caches? +    */ +   if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { +      struct brw_mi_flush flush; + +      memset(&flush, 0, sizeof(flush));       +      flush.opcode = CMD_MI_FLUSH; + +      if (!(flags & PIPE_FLUSH_RENDER_CACHE)) +	 flush.flags |= BRW_INHIBIT_FLUSH_RENDER_CACHE; + +      if (flags & PIPE_FLUSH_TEXTURE_CACHE) +	 flush.flags |= BRW_FLUSH_READ_CACHE; + +      BRW_BATCH_STRUCT(brw, &flush); +   } + +   /* If there are no flags, just flush pending commands to hardware: +    */ +   FLUSH_BATCH( fence ); +} + + + +void brw_init_flush_functions( struct brw_context *brw ) +{ +   brw->pipe.flush = brw_flush; +} diff --git a/src/gallium/drivers/i965simple/brw_gs.c b/src/gallium/drivers/i965simple/brw_gs.c new file mode 100644 index 0000000000..de60868ccc --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs.c @@ -0,0 +1,196 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_gs.h" + + + +static void compile_gs_prog( struct brw_context *brw, +			     struct brw_gs_prog_key *key ) +{ +   struct brw_gs_compile c; +   const unsigned *program; +   unsigned program_size; + +   memset(&c, 0, sizeof(c)); + +   c.key = *key; + +   /* Need to locate the two positions present in vertex + header. +    * These are currently hardcoded: +    */ +   c.nr_attrs = brw_count_bits(c.key.attrs); +   c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */ +   c.nr_bytes = c.nr_regs * REG_SIZE; + + +   /* Begin the compilation: +    */ +   brw_init_compile(&c.func); + +   c.func.single_program_flow = 1; + +   /* For some reason the thread is spawned with only 4 channels +    * unmasked. +    */ +   brw_set_mask_control(&c.func, BRW_MASK_DISABLE); + + +   /* Note that primitives which don't require a GS program have +    * already been weeded out by this stage: +    */ +   switch (key->primitive) { +   case PIPE_PRIM_QUADS: +      brw_gs_quads( &c ); +      break; +   case PIPE_PRIM_QUAD_STRIP: +      brw_gs_quad_strip( &c ); +      break; +   case PIPE_PRIM_LINE_LOOP: +      brw_gs_lines( &c ); +      break; +   case PIPE_PRIM_LINES: +      if (key->hint_gs_always) +	 brw_gs_lines( &c ); +      else { +	 return; +      } +      break; +   case PIPE_PRIM_TRIANGLES: +      if (key->hint_gs_always) +	 brw_gs_tris( &c ); +      else { +	 return; +      } +      break; +   case PIPE_PRIM_POINTS: +      if (key->hint_gs_always) +	 brw_gs_points( &c ); +      else { +	 return; +      } +      break; +   default: +      return; +   } + +   /* get the program +    */ +   program = brw_get_program(&c.func, &program_size); + +   /* Upload +    */ +   brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG], +					      &c.key, +					      sizeof(c.key), +					      program, +					      program_size, +					      &c.prog_data, +					      &brw->gs.prog_data ); +} + + +static boolean search_cache( struct brw_context *brw, +			       struct brw_gs_prog_key *key ) +{ +   return brw_search_cache(&brw->cache[BRW_GS_PROG], +			   key, sizeof(*key), +			   &brw->gs.prog_data, +			   &brw->gs.prog_gs_offset); +} + + +static const int gs_prim[PIPE_PRIM_POLYGON+1] = { +   PIPE_PRIM_POINTS, +   PIPE_PRIM_LINES, +   PIPE_PRIM_LINE_LOOP, +   PIPE_PRIM_LINES, +   PIPE_PRIM_TRIANGLES, +   PIPE_PRIM_TRIANGLES, +   PIPE_PRIM_TRIANGLES, +   PIPE_PRIM_QUADS, +   PIPE_PRIM_QUAD_STRIP, +   PIPE_PRIM_TRIANGLES +}; + +static void populate_key( struct brw_context *brw, +			  struct brw_gs_prog_key *key ) +{ +   memset(key, 0, sizeof(*key)); + +   /* CACHE_NEW_VS_PROG */ +   key->attrs = brw->vs.prog_data->outputs_written; + +   /* BRW_NEW_PRIMITIVE */ +   key->primitive = gs_prim[brw->primitive]; + +   key->hint_gs_always = 0;	/* debug code? */ + +   key->need_gs_prog = (key->hint_gs_always || +			brw->primitive == PIPE_PRIM_QUADS || +			brw->primitive == PIPE_PRIM_QUAD_STRIP || +			brw->primitive == PIPE_PRIM_LINE_LOOP); +} + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_gs_prog( struct brw_context *brw ) +{ +   struct brw_gs_prog_key key; + +   /* Populate the key: +    */ +   populate_key(brw, &key); + +   if (brw->gs.prog_active != key.need_gs_prog) { +      brw->state.dirty.cache |= CACHE_NEW_GS_PROG; +      brw->gs.prog_active = key.need_gs_prog; +   } + +   if (brw->gs.prog_active) { +      if (!search_cache(brw, &key)) +	 compile_gs_prog( brw, &key ); +   } +} + + +const struct brw_tracked_state brw_gs_prog = { +   .dirty = { +      .brw   = BRW_NEW_PRIMITIVE, +      .cache = CACHE_NEW_VS_PROG +   }, +   .update = upload_gs_prog +}; diff --git a/src/gallium/drivers/i965simple/brw_gs.h b/src/gallium/drivers/i965simple/brw_gs.h new file mode 100644 index 0000000000..f09141c6aa --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs.h @@ -0,0 +1,75 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. +  + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: +  + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. +  + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +  + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ +  + +#ifndef BRW_GS_H +#define BRW_GS_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_GS_VERTS (4)	      + +struct brw_gs_prog_key { +   unsigned attrs:32; +   unsigned primitive:4; +   unsigned hint_gs_always:1; +   unsigned need_gs_prog:1; +   unsigned pad:26; +}; + +struct brw_gs_compile { +   struct brw_compile func; +   struct brw_gs_prog_key key; +   struct brw_gs_prog_data prog_data; +    +   struct { +      struct brw_reg R0; +      struct brw_reg vertex[MAX_GS_VERTS]; +   } reg; + +   /* 3 different ways of expressing vertex size: +    */ +   unsigned nr_attrs; +   unsigned nr_regs; +   unsigned nr_bytes; +}; + +#define ATTR_SIZE  (4*4) + +void brw_gs_quads( struct brw_gs_compile *c ); +void brw_gs_quad_strip( struct brw_gs_compile *c ); +void brw_gs_tris( struct brw_gs_compile *c ); +void brw_gs_lines( struct brw_gs_compile *c ); +void brw_gs_points( struct brw_gs_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_gs_emit.c b/src/gallium/drivers/i965simple/brw_gs_emit.c new file mode 100644 index 0000000000..c3cc90b10f --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs_emit.c @@ -0,0 +1,148 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. +  + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: +  + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. +  + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +  + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_gs.h" + +static void brw_gs_alloc_regs( struct brw_gs_compile *c, +			       unsigned nr_verts ) +{ +   unsigned i = 0,j; + +   /* Register usage is static, precompute here: +    */ +   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + +   /* Payload vertices plus space for more generated vertices: +    */ +   for (j = 0; j < nr_verts; j++) { +      c->reg.vertex[j] = brw_vec4_grf(i, 0); +      i += c->nr_regs; +   } + +   c->prog_data.urb_read_length = c->nr_regs;  +   c->prog_data.total_grf = i; +} + + +static void brw_gs_emit_vue(struct brw_gs_compile *c,  +			    struct brw_reg vert, +			    boolean last, +			    unsigned header) +{ +   struct brw_compile *p = &c->func; +   boolean allocate = !last; + +   /* Overwrite PrimType and PrimStart in the message header, for +    * each vertex in turn: +    */ +   brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + +   /* Copy the vertex from vertn into m1..mN+1: +    */ +   brw_copy8(p, brw_message_reg(1), vert, c->nr_regs); + +   /* Send each vertex as a seperate write to the urb.  This is +    * different to the concept in brw_sf_emit.c, where subsequent +    * writes are used to build up a single urb entry.  Each of these +    * writes instantiates a seperate urb entry, and a new one must be +    * allocated each time. +    */ +   brw_urb_WRITE(p,  +		 allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), +		 0, +		 c->reg.R0, +		 allocate, +		 1,		/* used */ +		 c->nr_regs + 1, /* msg length */ +		 allocate ? 1 : 0, /* response length */ +		 allocate ? 0 : 1, /* eot */ +		 1,		/* writes_complete */ +		 0,		/* urb offset */ +		 BRW_URB_SWIZZLE_NONE); +} + + + +void brw_gs_quads( struct brw_gs_compile *c ) +{ +   brw_gs_alloc_regs(c, 4); +    +   /* Use polygons for correct edgeflag behaviour. Note that vertex 3 +    * is the PV for quads, but vertex 0 for polygons: +    */ +   brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); +   brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); +   brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));  +   brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_quad_strip( struct brw_gs_compile *c ) +{ +   brw_gs_alloc_regs(c, 4); +    +   brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); +   brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); +   brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));  +   brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_tris( struct brw_gs_compile *c ) +{ +   brw_gs_alloc_regs(c, 3); +   brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); +   brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); +   brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END)); +} + +void brw_gs_lines( struct brw_gs_compile *c ) +{ +   brw_gs_alloc_regs(c, 2); +   brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); +   brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); +} + +void brw_gs_points( struct brw_gs_compile *c ) +{ +   brw_gs_alloc_regs(c, 1); +   brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); +} + + + + + + + + diff --git a/src/gallium/drivers/i965simple/brw_gs_state.c b/src/gallium/drivers/i965simple/brw_gs_state.c new file mode 100644 index 0000000000..5b8016b2e9 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs_state.c @@ -0,0 +1,90 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + + +static void upload_gs_unit( struct brw_context *brw ) +{ +   struct brw_gs_unit_state gs; + +   memset(&gs, 0, sizeof(gs)); + +   /* CACHE_NEW_GS_PROG */ +   if (brw->gs.prog_active) { +      gs.thread0.grf_reg_count = +	 align(brw->gs.prog_data->total_grf, 16) / 16 - 1; +      gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6; +      gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length; +   } +   else { +      gs.thread0.grf_reg_count = 0; +      gs.thread0.kernel_start_pointer = 0; +      gs.thread3.urb_entry_read_length = 1; +   } + +   /* BRW_NEW_URB_FENCE */ +   gs.thread4.nr_urb_entries = brw->urb.nr_gs_entries; +   gs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + +   gs.thread4.max_threads = 0; /* Hardware requirement */ + +   if (BRW_DEBUG & DEBUG_STATS) +      gs.thread4.stats_enable = 1; + +   /* CONSTANT */ +   gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; +   gs.thread1.single_program_flow = 1; +   gs.thread3.dispatch_grf_start_reg = 1; +   gs.thread3.const_urb_entry_read_offset = 0; +   gs.thread3.const_urb_entry_read_length = 0; +   gs.thread3.urb_entry_read_offset = 0; + + +   brw->gs.state_gs_offset = brw_cache_data( &brw->cache[BRW_GS_UNIT], &gs ); +} + + +const struct brw_tracked_state brw_gs_unit = { +   .dirty = { +      .brw   = (BRW_NEW_CURBE_OFFSETS | +		BRW_NEW_URB_FENCE), +      .cache = CACHE_NEW_GS_PROG +   }, +   .update = upload_gs_unit +}; diff --git a/src/gallium/drivers/i965simple/brw_misc_state.c b/src/gallium/drivers/i965simple/brw_misc_state.c new file mode 100644 index 0000000000..99ff4403a5 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_misc_state.c @@ -0,0 +1,488 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + +#include "brw_batch.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + + + + + +/*********************************************************************** + * Blend color + */ + +static void upload_blend_constant_color(struct brw_context *brw) +{ +   struct brw_blend_constant_color bcc; + +   memset(&bcc, 0, sizeof(bcc)); +   bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; +   bcc.header.length = sizeof(bcc)/4-2; +   bcc.blend_constant_color[0] = brw->attribs.BlendColor.color[0]; +   bcc.blend_constant_color[1] = brw->attribs.BlendColor.color[1]; +   bcc.blend_constant_color[2] = brw->attribs.BlendColor.color[2]; +   bcc.blend_constant_color[3] = brw->attribs.BlendColor.color[3]; + +   BRW_CACHED_BATCH_STRUCT(brw, &bcc); +} + + +const struct brw_tracked_state brw_blend_constant_color = { +   .dirty = { +      .brw = BRW_NEW_BLEND, +      .cache = 0 +   }, +   .update = upload_blend_constant_color +}; + + +/*********************************************************************** + * Drawing rectangle  + */ +static void upload_drawing_rect(struct brw_context *brw) +{ +   struct brw_drawrect bdr; + +   memset(&bdr, 0, sizeof(bdr)); +   bdr.header.opcode = CMD_DRAW_RECT; +   bdr.header.length = sizeof(bdr)/4 - 2; +   bdr.xmin = 0; +   bdr.ymin = 0; +   bdr.xmax = brw->attribs.FrameBuffer.cbufs[0]->width; +   bdr.ymax = brw->attribs.FrameBuffer.cbufs[0]->height; +   bdr.xorg = 0; +   bdr.yorg = 0; + +   /* Can't use BRW_CACHED_BATCH_STRUCT because this is also emitted +    * uncached in brw_draw.c: +    */ +   BRW_BATCH_STRUCT(brw, &bdr); +} + +const struct brw_tracked_state brw_drawing_rect = { +   .dirty = { +      .brw = BRW_NEW_SCENE, +      .cache = 0 +   }, +   .update = upload_drawing_rect +}; + +/** + * Upload the binding table pointers, which point each stage's array of surface + * state pointers. + * + * The binding table pointers are relative to the surface state base address, + * which is the BRW_SS_POOL cache buffer. + */ +static void upload_binding_table_pointers(struct brw_context *brw) +{ +   struct brw_binding_table_pointers btp; +   memset(&btp, 0, sizeof(btp)); + +   btp.header.opcode = CMD_BINDING_TABLE_PTRS; +   btp.header.length = sizeof(btp)/4 - 2; +   btp.vs = 0; +   btp.gs = 0; +   btp.clp = 0; +   btp.sf = 0; +   btp.wm = brw->wm.bind_ss_offset; + +   BRW_CACHED_BATCH_STRUCT(brw, &btp); +} + +const struct brw_tracked_state brw_binding_table_pointers = { +   .dirty = { +      .brw = 0, +      .cache = CACHE_NEW_SURF_BIND +   }, +   .update = upload_binding_table_pointers, +}; + + +/** + * Upload pointers to the per-stage state. + * + * The state pointers in this packet are all relative to the general state + * base address set by CMD_STATE_BASE_ADDRESS, which is the BRW_GS_POOL buffer. + */ +static void upload_pipelined_state_pointers(struct brw_context *brw ) +{ +   struct brw_pipelined_state_pointers psp; +   memset(&psp, 0, sizeof(psp)); + +   psp.header.opcode = CMD_PIPELINED_STATE_POINTERS; +   psp.header.length = sizeof(psp)/4 - 2; + +   psp.vs.offset = brw->vs.state_gs_offset >> 5; +   psp.sf.offset = brw->sf.state_gs_offset >> 5; +   psp.wm.offset = brw->wm.state_gs_offset >> 5; +   psp.cc.offset = brw->cc.state_gs_offset >> 5; + +   /* GS gets turned on and off regularly.  Need to re-emit URB fence +    * after this occurs. +    */ +   if (brw->gs.prog_active) { +      psp.gs.offset = brw->gs.state_gs_offset >> 5; +      psp.gs.enable = 1; +   } + +   if (0) { +      psp.clp.offset = brw->clip.state_gs_offset >> 5; +      psp.clp.enable = 1; +   } + + +   if (BRW_CACHED_BATCH_STRUCT(brw, &psp)) +      brw->state.dirty.brw |= BRW_NEW_PSP; +} + +const struct brw_tracked_state brw_pipelined_state_pointers = { +   .dirty = { +      .brw = 0, +      .cache = (CACHE_NEW_VS_UNIT | +		CACHE_NEW_GS_UNIT | +		CACHE_NEW_GS_PROG | +		CACHE_NEW_CLIP_UNIT | +		CACHE_NEW_SF_UNIT | +		CACHE_NEW_WM_UNIT | +		CACHE_NEW_CC_UNIT) +   }, +   .update = upload_pipelined_state_pointers +}; + +static void upload_psp_urb_cbs(struct brw_context *brw ) +{ +   upload_pipelined_state_pointers(brw); +   brw_upload_urb_fence(brw); +   brw_upload_constant_buffer_state(brw); +} + + +const struct brw_tracked_state brw_psp_urb_cbs = { +   .dirty = { +      .brw = BRW_NEW_URB_FENCE, +      .cache = (CACHE_NEW_VS_UNIT | +		CACHE_NEW_GS_UNIT | +		CACHE_NEW_GS_PROG | +		CACHE_NEW_CLIP_UNIT | +		CACHE_NEW_SF_UNIT | +		CACHE_NEW_WM_UNIT | +		CACHE_NEW_CC_UNIT) +   }, +   .update = upload_psp_urb_cbs +}; + +/** + * Upload the depthbuffer offset and format. + * + * We have to do this per state validation as we need to emit the relocation + * in the batch buffer. + */ +static void upload_depthbuffer(struct brw_context *brw) +{ +   struct pipe_surface *depth_surface = brw->attribs.FrameBuffer.zsbuf; + +   BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS); +   OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2)); +   if (depth_surface == NULL) { +      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | +		(BRW_SURFACE_NULL << 29)); +      OUT_BATCH(0); +      OUT_BATCH(0); +      OUT_BATCH(0); +   } else { +      unsigned int format; +      struct brw_texture *tex = (struct brw_texture *)depth_surface->texture; +      assert(depth_surface->block.width == 1); +      assert(depth_surface->block.height == 1); +      switch (depth_surface->block.size) { +      case 2: +	 format = BRW_DEPTHFORMAT_D16_UNORM; +	 break; +      case 4: +	 if (depth_surface->format == PIPE_FORMAT_Z32_FLOAT) +	    format = BRW_DEPTHFORMAT_D32_FLOAT; +	 else +	    format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; +	 break; +      default: +	 assert(0); +	 return; +      } + +      OUT_BATCH((depth_surface->stride - 1) | +		(format << 18) | +		(BRW_TILEWALK_YMAJOR << 26) | +//		(depth_surface->region->tiled << 27) | +		(BRW_SURFACE_2D << 29)); +      OUT_RELOC(tex->buffer, +		PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE, 0); +      OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | +		((depth_surface->stride/depth_surface->block.size - 1) << 6) | +		((depth_surface->height - 1) << 19)); +      OUT_BATCH(0); +   } +   ADVANCE_BATCH(); +} + +const struct brw_tracked_state brw_depthbuffer = { +   .dirty = { +      .brw = BRW_NEW_SCENE, +      .cache = 0 +   }, +   .update = upload_depthbuffer, +}; + + + + +/*********************************************************************** + * Polygon stipple packet + */ + +static void upload_polygon_stipple(struct brw_context *brw) +{ +   struct brw_polygon_stipple bps; +   unsigned i; + +   memset(&bps, 0, sizeof(bps)); +   bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; +   bps.header.length = sizeof(bps)/4-2; + +   /* XXX: state tracker should send *all* state down initially! +    */ +   if (brw->attribs.PolygonStipple) +      for (i = 0; i < 32; i++) +	 bps.stipple[i] = brw->attribs.PolygonStipple->stipple[31 - i]; /* invert */ + +   BRW_CACHED_BATCH_STRUCT(brw, &bps); +} + +const struct brw_tracked_state brw_polygon_stipple = { +   .dirty = { +      .brw = BRW_NEW_STIPPLE, +      .cache = 0 +   }, +   .update = upload_polygon_stipple +}; + + +/*********************************************************************** + * Line stipple packet + */ + +static void upload_line_stipple(struct brw_context *brw) +{ +   struct brw_line_stipple bls; +   float tmp; +   int tmpi; + +   memset(&bls, 0, sizeof(bls)); +   bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; +   bls.header.length = sizeof(bls)/4 - 2; + +   bls.bits0.pattern = brw->attribs.Raster->line_stipple_pattern; +   bls.bits1.repeat_count = brw->attribs.Raster->line_stipple_factor; + +   tmp = 1.0 / (float) brw->attribs.Raster->line_stipple_factor; +   tmpi = tmp * (1<<13); + + +   bls.bits1.inverse_repeat_count = tmpi; + +   BRW_CACHED_BATCH_STRUCT(brw, &bls); +} + +const struct brw_tracked_state brw_line_stipple = { +   .dirty = { +      .brw = BRW_NEW_STIPPLE, +      .cache = 0 +   }, +   .update = upload_line_stipple +}; + + +/*********************************************************************** + * Misc constant state packets + */ + +static void upload_pipe_control(struct brw_context *brw) +{ +   struct brw_pipe_control pc; + +   return; + +   memset(&pc, 0, sizeof(pc)); + +   pc.header.opcode = CMD_PIPE_CONTROL; +   pc.header.length = sizeof(pc)/4 - 2; +   pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE; + +   pc.header.instruction_state_cache_flush_enable = 1; + +   pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL; + +   BRW_BATCH_STRUCT(brw, &pc); +} + +const struct brw_tracked_state brw_pipe_control = { +   .dirty = { +      .brw = BRW_NEW_SCENE, +      .cache = 0 +   }, +   .update = upload_pipe_control +}; + + +/*********************************************************************** + * Misc invarient state packets + */ + +static void upload_invarient_state( struct brw_context *brw ) +{ +   { +      struct brw_mi_flush flush; + +      memset(&flush, 0, sizeof(flush));       +      flush.opcode = CMD_MI_FLUSH; +      flush.flags = BRW_FLUSH_STATE_CACHE | BRW_FLUSH_READ_CACHE; +      BRW_BATCH_STRUCT(brw, &flush); +   } + +   { +      /* 0x61040000  Pipeline Select */ +      /*     PipelineSelect            : 0 */ +      struct brw_pipeline_select ps; + +      memset(&ps, 0, sizeof(ps)); +      ps.header.opcode = CMD_PIPELINE_SELECT; +      ps.header.pipeline_select = 0; +      BRW_BATCH_STRUCT(brw, &ps); +   } + +   { +      struct brw_global_depth_offset_clamp gdo; +      memset(&gdo, 0, sizeof(gdo)); + +      /* Disable depth offset clamping. +       */ +      gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP; +      gdo.header.length = sizeof(gdo)/4 - 2; +      gdo.depth_offset_clamp = 0.0; + +      BRW_BATCH_STRUCT(brw, &gdo); +   } + + +   /* 0x61020000  State Instruction Pointer */ +   { +      struct brw_system_instruction_pointer sip; +      memset(&sip, 0, sizeof(sip)); + +      sip.header.opcode = CMD_STATE_INSN_POINTER; +      sip.header.length = 0; +      sip.bits0.pad = 0; +      sip.bits0.system_instruction_pointer = 0; +      BRW_BATCH_STRUCT(brw, &sip); +   } + + +   { +      struct brw_vf_statistics vfs; +      memset(&vfs, 0, sizeof(vfs)); + +      vfs.opcode = CMD_VF_STATISTICS; +      if (BRW_DEBUG & DEBUG_STATS) +	 vfs.statistics_enable = 1; + +      BRW_BATCH_STRUCT(brw, &vfs); +   } + +    +   { +      struct brw_polygon_stipple_offset bpso; +       +      memset(&bpso, 0, sizeof(bpso)); +      bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; +      bpso.header.length = sizeof(bpso)/4-2;       +      bpso.bits0.x_offset = 0; +      bpso.bits0.y_offset = 0; + +      BRW_BATCH_STRUCT(brw, &bpso); +   } +} + +const struct brw_tracked_state brw_invarient_state = { +   .dirty = { +      .brw = BRW_NEW_SCENE, +      .cache = 0 +   }, +   .update = upload_invarient_state +}; + +/** + * Define the base addresses which some state is referenced from. + * + * This allows us to avoid having to emit relocations in many places for + * cached state, and instead emit pointers inside of large, mostly-static + * state pools.  This comes at the expense of memory, and more expensive cache + * misses. + */ +static void upload_state_base_address( struct brw_context *brw ) +{ +   /* Output the structure (brw_state_base_address) directly to the +    * batchbuffer, so we can emit relocations inline. +    */ +   BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); +   OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); +   OUT_RELOC(brw->pool[BRW_GS_POOL].buffer, +	     PIPE_BUFFER_USAGE_GPU_READ, +	     1); /* General state base address */ +   OUT_RELOC(brw->pool[BRW_SS_POOL].buffer, +	     PIPE_BUFFER_USAGE_GPU_READ, +	     1); /* Surface state base address */ +   OUT_BATCH(1); /* Indirect object base address */ +   OUT_BATCH(1); /* General state upper bound */ +   OUT_BATCH(1); /* Indirect object upper bound */ +   ADVANCE_BATCH(); +} + + +const struct brw_tracked_state brw_state_base_address = { +   .dirty = { +      .brw = BRW_NEW_SCENE, +      .cache = 0 +   }, +   .update = upload_state_base_address +}; diff --git a/src/gallium/drivers/i965simple/brw_reg.h b/src/gallium/drivers/i965simple/brw_reg.h new file mode 100644 index 0000000000..9e885c3b3b --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_reg.h @@ -0,0 +1,76 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#define CMD_MI				(0x0 << 29) +#define CMD_2D				(0x2 << 29) +#define CMD_3D				(0x3 << 29) + +#define MI_BATCH_BUFFER_END		(CMD_MI | 0xA << 23) + +/* Stalls command execution waiting for the given events to have occurred. */ +#define MI_WAIT_FOR_EVENT               (CMD_MI | (0x3 << 23)) +#define MI_WAIT_FOR_PLANE_B_FLIP        (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP        (1<<2) + +/* Primitive dispatch on 830-945 */ +#define _3DPRIMITIVE			(CMD_3D | (0x1f << 24)) +#define PRIM_INDIRECT            (1<<23) +#define PRIM_INLINE              (0<<23) +#define PRIM_INDIRECT_SEQUENTIAL (0<<17) +#define PRIM_INDIRECT_ELTS       (1<<17) + +#define PRIM3D_TRILIST		(0x0<<18) +#define PRIM3D_TRISTRIP 	(0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE	(0x2<<18) +#define PRIM3D_TRIFAN		(0x3<<18) +#define PRIM3D_POLY		(0x4<<18) +#define PRIM3D_LINELIST 	(0x5<<18) +#define PRIM3D_LINESTRIP	(0x6<<18) +#define PRIM3D_RECTLIST 	(0x7<<18) +#define PRIM3D_POINTLIST	(0x8<<18) +#define PRIM3D_DIB		(0x9<<18) +#define PRIM3D_MASK		(0x1f<<18) + +#define XY_SETUP_BLT_CMD		(CMD_2D | (0x01 << 22) | 6) + +#define XY_COLOR_BLT_CMD		(CMD_2D | (0x50 << 22) | 4) + +#define XY_SRC_COPY_BLT_CMD             (CMD_2D | (0x53 << 22) | 6) + +/* BR00 */ +#define XY_BLT_WRITE_ALPHA	(1 << 21) +#define XY_BLT_WRITE_RGB	(1 << 20) +#define XY_SRC_TILED		(1 << 15) +#define XY_DST_TILED		(1 << 11) + +/* BR13 */ +#define BR13_565		(0x1 << 24) +#define BR13_8888		(0x3 << 24) + +#define FENCE_LINEAR 0 +#define FENCE_XMAJOR 1 +#define FENCE_YMAJOR 2 diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c new file mode 100644 index 0000000000..b22e105f10 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -0,0 +1,246 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include "util/u_memory.h" +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_string.h" +#include "util/u_simple_screen.h" + +#include "brw_context.h" +#include "brw_screen.h" +#include "brw_tex_layout.h" + + +static const char * +brw_get_vendor( struct pipe_screen *screen ) +{ +   return "Tungsten Graphics, Inc."; +} + + +static const char * +brw_get_name( struct pipe_screen *screen ) +{ +   static char buffer[128]; +   const char *chipset; + +   switch (brw_screen(screen)->pci_id) { +   case PCI_CHIP_I965_Q: +      chipset = "Intel(R) 965Q"; +      break; +   case PCI_CHIP_I965_G: +   case PCI_CHIP_I965_G_1: +      chipset = "Intel(R) 965G"; +      break; +   case PCI_CHIP_I965_GM: +      chipset = "Intel(R) 965GM"; +      break; +   case PCI_CHIP_I965_GME: +      chipset = "Intel(R) 965GME/GLE"; +      break; +   default: +      chipset = "unknown"; +      break; +   } + +   util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset); +   return buffer; +} + + +static int +brw_get_param(struct pipe_screen *screen, int param) +{ +   switch (param) { +   case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: +      return 8; +   case PIPE_CAP_NPOT_TEXTURES: +      return 1; +   case PIPE_CAP_TWO_SIDED_STENCIL: +      return 1; +   case PIPE_CAP_GLSL: +      return 0; +   case PIPE_CAP_S3TC: +      return 0; +   case PIPE_CAP_ANISOTROPIC_FILTER: +      return 0; +   case PIPE_CAP_POINT_SPRITE: +      return 0; +   case PIPE_CAP_MAX_RENDER_TARGETS: +      return 1; +   case PIPE_CAP_OCCLUSION_QUERY: +      return 0; +   case PIPE_CAP_TEXTURE_SHADOW_MAP: +      return 1; +   case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: +      return 11; /* max 1024x1024 */ +   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: +      return 8;  /* max 128x128x128 */ +   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: +      return 11; /* max 1024x1024 */ +   default: +      return 0; +   } +} + + +static float +brw_get_paramf(struct pipe_screen *screen, int param) +{ +   switch (param) { +   case PIPE_CAP_MAX_LINE_WIDTH: +      /* fall-through */ +   case PIPE_CAP_MAX_LINE_WIDTH_AA: +      return 7.5; + +   case PIPE_CAP_MAX_POINT_WIDTH: +      /* fall-through */ +   case PIPE_CAP_MAX_POINT_WIDTH_AA: +      return 255.0; + +   case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: +      return 4.0; + +   case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: +      return 16.0; + +   default: +      return 0; +   } +} + + +static boolean +brw_is_format_supported( struct pipe_screen *screen, +                         enum pipe_format format,  +                         enum pipe_texture_target target, +                         unsigned tex_usage,  +                         unsigned geom_flags ) +{ +#if 0 +   /* XXX: This is broken -- rewrite if still needed. */ +   static const unsigned tex_supported[] = { +      PIPE_FORMAT_R8G8B8A8_UNORM, +      PIPE_FORMAT_A8R8G8B8_UNORM, +      PIPE_FORMAT_R5G6B5_UNORM, +      PIPE_FORMAT_L8_UNORM, +      PIPE_FORMAT_A8_UNORM, +      PIPE_FORMAT_I8_UNORM, +      PIPE_FORMAT_L8A8_UNORM, +      PIPE_FORMAT_YCBCR, +      PIPE_FORMAT_YCBCR_REV, +      PIPE_FORMAT_S8_Z24, +   }; + + +   /* Actually a lot more than this - add later: +    */ +   static const unsigned render_supported[] = { +      PIPE_FORMAT_A8R8G8B8_UNORM, +      PIPE_FORMAT_R5G6B5_UNORM, +   }; + +   /* +    */ +   static const unsigned z_stencil_supported[] = { +      PIPE_FORMAT_Z16_UNORM, +      PIPE_FORMAT_Z32_UNORM, +      PIPE_FORMAT_S8Z24_UNORM, +   }; + +   switch (type) { +   case PIPE_RENDER_FORMAT: +      *numFormats = Elements(render_supported); +      return render_supported; + +   case PIPE_TEX_FORMAT: +      *numFormats = Elements(tex_supported); +      return render_supported; + +   case PIPE_Z_STENCIL_FORMAT: +      *numFormats = Elements(render_supported); +      return render_supported; + +   default: +      *numFormats = 0; +      return NULL; +   } +#else +   switch (format) { +   case PIPE_FORMAT_A8R8G8B8_UNORM: +   case PIPE_FORMAT_R5G6B5_UNORM: +   case PIPE_FORMAT_S8Z24_UNORM: +      return TRUE; +   default: +      return FALSE; +   }; +   return FALSE; +#endif +} + + +static void +brw_destroy_screen( struct pipe_screen *screen ) +{ +   struct pipe_winsys *winsys = screen->winsys; + +   if(winsys->destroy) +      winsys->destroy(winsys); + +   FREE(screen); +} + + +/** + * Create a new brw_screen object + */ +struct pipe_screen * +brw_create_screen(struct pipe_winsys *winsys, uint pci_id) +{ +   struct brw_screen *brwscreen = CALLOC_STRUCT(brw_screen); + +   if (!brwscreen) +      return NULL; + +   brwscreen->pci_id = pci_id; + +   brwscreen->screen.winsys = winsys; + +   brwscreen->screen.destroy = brw_destroy_screen; + +   brwscreen->screen.get_name = brw_get_name; +   brwscreen->screen.get_vendor = brw_get_vendor; +   brwscreen->screen.get_param = brw_get_param; +   brwscreen->screen.get_paramf = brw_get_paramf; +   brwscreen->screen.is_format_supported = brw_is_format_supported; + +   brw_init_screen_texture_funcs(&brwscreen->screen); +   u_simple_screen_init(&brwscreen->screen); + +   return &brwscreen->screen; +} diff --git a/src/gallium/drivers/i965simple/brw_screen.h b/src/gallium/drivers/i965simple/brw_screen.h new file mode 100644 index 0000000000..d3c70387e6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_screen.h @@ -0,0 +1,68 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef BRW_SCREEN_H +#define BRW_SCREEN_H + + +#include "pipe/p_screen.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Subclass of pipe_screen + */ +struct brw_screen +{ +   struct pipe_screen screen; + +   uint pci_id; +}; + + +/** cast wrapper */ +static INLINE struct brw_screen * +brw_screen(struct pipe_screen *pscreen) +{ +   return (struct brw_screen *) pscreen; +} + + +extern struct pipe_screen * +brw_create_screen(struct pipe_winsys *winsys, uint pci_id); + + +#ifdef __cplusplus +} +#endif + +#endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965simple/brw_sf.c b/src/gallium/drivers/i965simple/brw_sf.c new file mode 100644 index 0000000000..b82a2e143b --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf.c @@ -0,0 +1,351 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" +#include "brw_state.h" +#include "tgsi/tgsi_parse.h" + + +static void compile_sf_prog( struct brw_context *brw, +			     struct brw_sf_prog_key *key ) +{ +   struct brw_sf_compile c; +   const unsigned *program; +   unsigned program_size; + +   memset(&c, 0, sizeof(c)); + +   /* Begin the compilation: +    */ +   brw_init_compile(&c.func); + +   c.key = *key; + + +   c.nr_attrs = c.key.vp_output_count; +   c.nr_attr_regs = (c.nr_attrs+1)/2; + +   c.nr_setup_attrs = c.key.fp_input_count + 1; /* +1 for position */ +   c.nr_setup_regs = (c.nr_setup_attrs+1)/2; + +   c.prog_data.urb_read_length = c.nr_attr_regs; +   c.prog_data.urb_entry_size = c.nr_setup_regs * 2; + + +   /* Which primitive?  Or all three? +    */ +   switch (key->primitive) { +   case SF_TRIANGLES: +      c.nr_verts = 3; +      brw_emit_tri_setup( &c ); +      break; +   case SF_LINES: +      c.nr_verts = 2; +      brw_emit_line_setup( &c ); +      break; +   case SF_POINTS: +      c.nr_verts = 1; +      brw_emit_point_setup( &c ); +      break; + +   case SF_UNFILLED_TRIS: +   default: +      assert(0); +      return; +   } + + + +   /* get the program +    */ +   program = brw_get_program(&c.func, &program_size); + +   /* Upload +    */ +   brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG], +					      &c.key, +					      sizeof(c.key), +					      program, +					      program_size, +					      &c.prog_data, +					      &brw->sf.prog_data ); +} + + +static boolean search_cache( struct brw_context *brw, +			       struct brw_sf_prog_key *key ) +{ +   return brw_search_cache(&brw->cache[BRW_SF_PROG], +			   key, sizeof(*key), +			   &brw->sf.prog_data, +			   &brw->sf.prog_gs_offset); +} + + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_sf_prog( struct brw_context *brw ) +{ +   const struct brw_fragment_program *fs = brw->attribs.FragmentProgram; +   struct brw_sf_prog_key key; +   struct tgsi_parse_context parse; +   int i, done = 0; + + +   memset(&key, 0, sizeof(key)); + +   /* Populate the key, noting state dependencies: +    */ +   /* CACHE_NEW_VS_PROG */ +   key.vp_output_count = brw->vs.prog_data->outputs_written; + +   /* BRW_NEW_FS */ +   key.fp_input_count = brw->attribs.FragmentProgram->info.file_max[TGSI_FILE_INPUT] + 1; + + +   /* BRW_NEW_REDUCED_PRIMITIVE */ +   switch (brw->reduced_primitive) { +   case PIPE_PRIM_TRIANGLES: +//      if (key.attrs & (1<<VERT_RESULT_EDGE)) +//	 key.primitive = SF_UNFILLED_TRIS; +//      else +      key.primitive = SF_TRIANGLES; +      break; +   case PIPE_PRIM_LINES: +      key.primitive = SF_LINES; +      break; +   case PIPE_PRIM_POINTS: +      key.primitive = SF_POINTS; +      break; +   } + + + +   /* Scan fp inputs to figure out what interpolation modes are +    * required for each incoming vp output.  There is an assumption +    * that the state tracker makes sure there is a 1:1 linkage between +    * these sets of attributes (XXX: position??) +    */ +   tgsi_parse_init( &parse, fs->program.tokens ); +   while( !done && +	  !tgsi_parse_end_of_tokens( &parse ) )  +   { +      tgsi_parse_token( &parse ); + +      switch( parse.FullToken.Token.Type ) { +      case TGSI_TOKEN_TYPE_DECLARATION: +	 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT)  +	 { +	    int first = parse.FullToken.FullDeclaration.DeclarationRange.First; +	    int last = parse.FullToken.FullDeclaration.DeclarationRange.Last; +	    int interp_mode = parse.FullToken.FullDeclaration.Declaration.Interpolate; +	    //int semantic = parse.FullToken.FullDeclaration.Semantic.SemanticName; +	    //int semantic_index = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + +	    debug_printf("fs input %d..%d interp mode %d\n", first, last, interp_mode); +	     +	    switch (interp_mode) { +	    case TGSI_INTERPOLATE_CONSTANT: +	       for (i = first; i <= last; i++)  +		  key.const_mask |= (1 << i); +	       break; +	    case TGSI_INTERPOLATE_LINEAR: +	       for (i = first; i <= last; i++)  +		  key.linear_mask |= (1 << i); +	       break; +	    case TGSI_INTERPOLATE_PERSPECTIVE: +	       for (i = first; i <= last; i++)  +		  key.persp_mask |= (1 << i); +	       break; +	    default: +	       break; +	    } + +	    /* Also need stuff for flat shading, twosided color. +	     */ + +	 } +	 break; +      default: +	 done = 1; +	 break; +      } +   } + +   /* Hack: Adjust for position.  Optimize away when not required (ie +    * for perspective interpolation). +    */ +   key.persp_mask <<= 1; +   key.linear_mask <<= 1;  +   key.linear_mask |= 1; +   key.const_mask <<= 1; + +   debug_printf("key.persp_mask: %x\n", key.persp_mask); +   debug_printf("key.linear_mask: %x\n", key.linear_mask); +   debug_printf("key.const_mask: %x\n", key.const_mask); + + +//   key.do_point_sprite = brw->attribs.Point->PointSprite; +//   key.SpriteOrigin = brw->attribs.Point->SpriteOrigin; + +//   key.do_flat_shading = (brw->attribs.Raster->flatshade); +//   key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide); + +//   if (key.do_twoside_color) +//      key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW); + + +   if (!search_cache(brw, &key)) +      compile_sf_prog( brw, &key ); +} + + +const struct brw_tracked_state brw_sf_prog = { +   .dirty = { +      .brw   = (BRW_NEW_RASTERIZER | +		BRW_NEW_REDUCED_PRIMITIVE | +		BRW_NEW_VS | +		BRW_NEW_FS), +      .cache = 0, +   }, +   .update = upload_sf_prog +}; + + + +#if 0 +/* Build a struct like the one we'd like the state tracker to pass to + * us. + */ +static void update_sf_linkage( struct brw_context *brw ) +{ +   const struct brw_vertex_program *vs = brw->attribs.VertexProgram; +   const struct brw_fragment_program *fs = brw->attribs.FragmentProgram; +   struct pipe_setup_linkage state; +   struct tgsi_parse_context parse; + +   int i, j; +   int nr_vp_outputs = 0; +   int done = 0; + +   struct {  +      unsigned semantic:8; +      unsigned semantic_index:16; +   } fp_semantic[32], vp_semantic[32]; + +   memset(&state, 0, sizeof(state)); + +   state.fp_input_count = 0; + + + +    + + +   assert(state.fp_input_count == fs->program.num_inputs); + +       +   /* Then scan vp outputs +    */ +   done = 0; +   tgsi_parse_init( &parse, vs->program.tokens ); +   while( !done && +	  !tgsi_parse_end_of_tokens( &parse ) )  +   { +      tgsi_parse_token( &parse ); + +      switch( parse.FullToken.Token.Type ) { +      case TGSI_TOKEN_TYPE_DECLARATION: +	 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT)  +	 { +	    int first = parse.FullToken.FullDeclaration.DeclarationRange.First; +	    int last = parse.FullToken.FullDeclaration.DeclarationRange.Last; + +	    for (i = first; i < last; i++) { +	       vp_semantic[i].semantic =  +		  parse.FullToken.FullDeclaration.Semantic.SemanticName; +	       vp_semantic[i].semantic_index =  +		  parse.FullToken.FullDeclaration.Semantic.SemanticIndex; +	    } +	     +	    assert(last > nr_vp_outputs); +	    nr_vp_outputs = last; +	 } +	 break; +      default: +	 done = 1; +	 break; +      } +   } + + +   /* Now match based on semantic information. +    */ +   for (i = 0; i< state.fp_input_count; i++) { +      for (j = 0; j < nr_vp_outputs; j++) { +	 if (fp_semantic[i].semantic == vp_semantic[j].semantic && +	     fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { +	    state.fp_input[i].vp_output = j; +	 } +      } +      if (fp_semantic[i].semantic == TGSI_SEMANTIC_COLOR) { +	 for (j = 0; j < nr_vp_outputs; j++) { +	    if (TGSI_SEMANTIC_BCOLOR == vp_semantic[j].semantic && +		fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { +	       state.fp_input[i].bf_vp_output = j; +	    } +	 } +      } +   } + +   if (memcmp(&brw->sf.linkage, &state, sizeof(state)) != 0) { +      brw->sf.linkage = state; +      brw->state.dirty.brw |= BRW_NEW_SF_LINKAGE; +   } +} + + +const struct brw_tracked_state brw_sf_linkage = { +   .dirty = { +      .brw   = (BRW_NEW_VS | +		BRW_NEW_FS), +      .cache = 0, +   }, +   .update = update_sf_linkage +}; + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_sf.h b/src/gallium/drivers/i965simple/brw_sf.h new file mode 100644 index 0000000000..b7ada47560 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf.h @@ -0,0 +1,122 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#ifndef BRW_SF_H +#define BRW_SF_H + +#include "brw_context.h" +#include "brw_eu.h" + + +#define SF_POINTS    0 +#define SF_LINES     1 +#define SF_TRIANGLES 2 +#define SF_UNFILLED_TRIS   3 + + + +struct brw_sf_prog_key { +   unsigned vp_output_count:5; +   unsigned fp_input_count:5; + +   unsigned primitive:2; +   unsigned do_twoside_color:1; +   unsigned do_flat_shading:1; +   unsigned frontface_ccw:1; +   unsigned do_point_sprite:1; + +   /* Interpolation masks; +    */ +   unsigned linear_mask; +   unsigned persp_mask; +   unsigned const_mask; + + +//   int SpriteOrigin; +}; + +struct brw_sf_point_tex { +	boolean CoordReplace; +}; + +struct brw_sf_compile { +   struct brw_compile func; +   struct brw_sf_prog_key key; +   struct brw_sf_prog_data prog_data; + +   struct brw_reg pv; +   struct brw_reg det; +   struct brw_reg dx0; +   struct brw_reg dx2; +   struct brw_reg dy0; +   struct brw_reg dy2; + +   /* z and 1/w passed in seperately: +    */ +   struct brw_reg z[3]; +   struct brw_reg inv_w[3]; + +   /* The vertices: +    */ +   struct brw_reg vert[3]; + +    /* Temporaries, allocated after last vertex reg. +    */ +   struct brw_reg inv_det; +   struct brw_reg a1_sub_a0; +   struct brw_reg a2_sub_a0; +   struct brw_reg tmp; + +   struct brw_reg m1Cx; +   struct brw_reg m2Cy; +   struct brw_reg m3C0; + +   unsigned nr_verts; +   unsigned nr_attrs; +   unsigned nr_attr_regs; +   unsigned nr_setup_attrs; +   unsigned nr_setup_regs; +#if 0 +   ubyte attr_to_idx[VERT_RESULT_MAX]; +   ubyte idx_to_attr[VERT_RESULT_MAX]; +   struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; +#endif +}; + + +void brw_emit_tri_setup( struct brw_sf_compile *c ); +void brw_emit_line_setup( struct brw_sf_compile *c ); +void brw_emit_point_setup( struct brw_sf_compile *c ); +void brw_emit_point_sprite_setup( struct brw_sf_compile *c ); +void brw_emit_anyprim_setup( struct brw_sf_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_sf_emit.c b/src/gallium/drivers/i965simple/brw_sf_emit.c new file mode 100644 index 0000000000..78d6fa5e9e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf_emit.c @@ -0,0 +1,382 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" + + + +/*********************************************************************** + * Triangle setup. + */ + + +static void alloc_regs( struct brw_sf_compile *c ) +{ +   unsigned reg, i; + +   /* Values computed by fixed function unit: +    */ +   c->pv  = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD); +   c->det = brw_vec1_grf(1, 2); +   c->dx0 = brw_vec1_grf(1, 3); +   c->dx2 = brw_vec1_grf(1, 4); +   c->dy0 = brw_vec1_grf(1, 5); +   c->dy2 = brw_vec1_grf(1, 6); + +   /* z and 1/w passed in seperately: +    */ +   c->z[0]     = brw_vec1_grf(2, 0); +   c->inv_w[0] = brw_vec1_grf(2, 1); +   c->z[1]     = brw_vec1_grf(2, 2); +   c->inv_w[1] = brw_vec1_grf(2, 3); +   c->z[2]     = brw_vec1_grf(2, 4); +   c->inv_w[2] = brw_vec1_grf(2, 5); + +   /* The vertices: +    */ +   reg = 3; +   for (i = 0; i < c->nr_verts; i++) { +      c->vert[i] = brw_vec8_grf(reg, 0); +      reg += c->nr_attr_regs; +   } + +   /* Temporaries, allocated after last vertex reg. +    */ +   c->inv_det = brw_vec1_grf(reg, 0);  reg++; +   c->a1_sub_a0 = brw_vec8_grf(reg, 0);  reg++; +   c->a2_sub_a0 = brw_vec8_grf(reg, 0);  reg++; +   c->tmp = brw_vec8_grf(reg, 0);  reg++; + +   /* Note grf allocation: +    */ +   c->prog_data.total_grf = reg; + + +   /* Outputs of this program - interpolation coefficients for +    * rasterization: +    */ +   c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0); +   c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0); +   c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0); +} + + +static void copy_z_inv_w( struct brw_sf_compile *c ) +{ +   struct brw_compile *p = &c->func; +   unsigned i; + +   brw_push_insn_state(p); + +   /* Copy both scalars with a single MOV: +    */ +   for (i = 0; i < c->nr_verts; i++) +      brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i])); + +   brw_pop_insn_state(p); +} + + +static void invert_det( struct brw_sf_compile *c) +{ +   brw_math(&c->func, +	    c->inv_det, +	    BRW_MATH_FUNCTION_INV, +	    BRW_MATH_SATURATE_NONE, +	    0, +	    c->det, +	    BRW_MATH_DATA_SCALAR, +	    BRW_MATH_PRECISION_FULL); + +} + +#define NON_PERPECTIVE_ATTRS  (FRAG_BIT_WPOS | \ +                               FRAG_BIT_COL0 | \ +			       FRAG_BIT_COL1) + +static boolean calculate_masks( struct brw_sf_compile *c, +				  unsigned reg, +				  ushort *pc, +				  ushort *pc_persp, +				  ushort *pc_linear) +{ +   boolean is_last_attr = (reg == c->nr_setup_regs - 1); +   unsigned persp_mask = c->key.persp_mask; +   unsigned linear_mask = c->key.linear_mask; + +   debug_printf("persp_mask: %x\n", persp_mask); +   debug_printf("linear_mask: %x\n", linear_mask); + +   *pc_persp = 0; +   *pc_linear = 0; +   *pc = 0xf; + +   if (persp_mask & (1 << (reg*2))) +      *pc_persp = 0xf; + +   if (linear_mask & (1 << (reg*2))) +      *pc_linear = 0xf; + +   /* Maybe only processs one attribute on the final round: +    */ +   if (reg*2+1 < c->nr_setup_attrs) { +      *pc |= 0xf0; + +      if (persp_mask & (1 << (reg*2+1))) +	 *pc_persp |= 0xf0; + +      if (linear_mask & (1 << (reg*2+1))) +	 *pc_linear |= 0xf0; +   } + +   debug_printf("pc: %x\n", *pc); +   debug_printf("pc_persp: %x\n", *pc_persp); +   debug_printf("pc_linear: %x\n", *pc_linear); +    + +   return is_last_attr; +} + + + +void brw_emit_tri_setup( struct brw_sf_compile *c ) +{ +   struct brw_compile *p = &c->func; +   unsigned i; + +   debug_printf("%s START ==============\n", __FUNCTION__); + +   c->nr_verts = 3; +   alloc_regs(c); +   invert_det(c); +   copy_z_inv_w(c); + + +   for (i = 0; i < c->nr_setup_regs; i++) +   { +      /* Pair of incoming attributes: +       */ +      struct brw_reg a0 = offset(c->vert[0], i); +      struct brw_reg a1 = offset(c->vert[1], i); +      struct brw_reg a2 = offset(c->vert[2], i); +      ushort pc = 0, pc_persp = 0, pc_linear = 0; +      boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + +      if (pc_persp) +      { +	 brw_set_predicate_control_flag_value(p, pc_persp); +	 brw_MUL(p, a0, a0, c->inv_w[0]); +	 brw_MUL(p, a1, a1, c->inv_w[1]); +	 brw_MUL(p, a2, a2, c->inv_w[2]); +      } + + +      /* Calculate coefficients for interpolated values: +       */ +      if (pc_linear) +      { +	 brw_set_predicate_control_flag_value(p, pc_linear); + +	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); +	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); + +	 /* calculate dA/dx +	  */ +	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); +	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); +	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + +	 /* calculate dA/dy +	  */ +	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); +	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); +	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); +      } + +      { +	 brw_set_predicate_control_flag_value(p, pc); +	 /* start point for interpolation +	  */ +	 brw_MOV(p, c->m3C0, a0); + +	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in +	  * the send instruction: +	  */ +	 brw_urb_WRITE(p, +		       brw_null_reg(), +		       0, +		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ +		       0, 	/* allocate */ +		       1,	/* used */ +		       4, 	/* msg len */ +		       0,	/* response len */ +		       last,	/* eot */ +		       last, 	/* writes complete */ +		       i*4,	/* offset */ +		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ +      } +   } + +   debug_printf("%s DONE ==============\n", __FUNCTION__); + +} + + + +void brw_emit_line_setup( struct brw_sf_compile *c ) +{ +   struct brw_compile *p = &c->func; +   unsigned i; + + +   c->nr_verts = 2; +   alloc_regs(c); +   invert_det(c); +   copy_z_inv_w(c); + +   for (i = 0; i < c->nr_setup_regs; i++) +   { +      /* Pair of incoming attributes: +       */ +      struct brw_reg a0 = offset(c->vert[0], i); +      struct brw_reg a1 = offset(c->vert[1], i); +      ushort pc, pc_persp, pc_linear; +      boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + +      if (pc_persp) +      { +	 brw_set_predicate_control_flag_value(p, pc_persp); +	 brw_MUL(p, a0, a0, c->inv_w[0]); +	 brw_MUL(p, a1, a1, c->inv_w[1]); +      } + +      /* Calculate coefficients for position, color: +       */ +      if (pc_linear) { +	 brw_set_predicate_control_flag_value(p, pc_linear); + +	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + + 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); +	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + +	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); +	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); +      } + +      { +	 brw_set_predicate_control_flag_value(p, pc); + +	 /* start point for interpolation +	  */ +	 brw_MOV(p, c->m3C0, a0); + +	 /* Copy m0..m3 to URB. +	  */ +	 brw_urb_WRITE(p, +		       brw_null_reg(), +		       0, +		       brw_vec8_grf(0, 0), +		       0, 	/* allocate */ +		       1, 	/* used */ +		       4, 	/* msg len */ +		       0,	/* response len */ +		       last, 	/* eot */ +		       last, 	/* writes complete */ +		       i*4,	/* urb destination offset */ +		       BRW_URB_SWIZZLE_TRANSPOSE); +      } +   } +} + + +/* Points setup - several simplifications as all attributes are + * constant across the face of the point (point sprites excluded!) + */ +void brw_emit_point_setup( struct brw_sf_compile *c ) +{ +   struct brw_compile *p = &c->func; +   unsigned i; + +   c->nr_verts = 1; +   alloc_regs(c); +   copy_z_inv_w(c); + +   brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ +   brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */ + +   for (i = 0; i < c->nr_setup_regs; i++) +   { +      struct brw_reg a0 = offset(c->vert[0], i); +      ushort pc, pc_persp, pc_linear; +      boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + +      if (pc_persp) +      { +	 /* This seems odd as the values are all constant, but the +	  * fragment shader will be expecting it: +	  */ +	 brw_set_predicate_control_flag_value(p, pc_persp); +	 brw_MUL(p, a0, a0, c->inv_w[0]); +      } + + +      /* The delta values are always zero, just send the starting +       * coordinate.  Again, this is to fit in with the interpolation +       * code in the fragment shader. +       */ +      { +	 brw_set_predicate_control_flag_value(p, pc); + +	 brw_MOV(p, c->m3C0, a0); /* constant value */ + +	 /* Copy m0..m3 to URB. +	  */ +	 brw_urb_WRITE(p, +		       brw_null_reg(), +		       0, +		       brw_vec8_grf(0, 0), +		       0, 	/* allocate */ +		       1,	/* used */ +		       4, 	/* msg len */ +		       0,	/* response len */ +		       last, 	/* eot */ +		       last, 	/* writes complete */ +		       i*4,	/* urb destination offset */ +		       BRW_URB_SWIZZLE_TRANSPOSE); +      } +   } +} diff --git a/src/gallium/drivers/i965simple/brw_sf_state.c b/src/gallium/drivers/i965simple/brw_sf_state.c new file mode 100644 index 0000000000..2a5de61c21 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf_state.c @@ -0,0 +1,181 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + +static void upload_sf_vp(struct brw_context *brw) +{ +   struct brw_sf_viewport sfv; + +   memset(&sfv, 0, sizeof(sfv)); + + +   /* BRW_NEW_VIEWPORT */ +   { +      const float *scale = brw->attribs.Viewport.scale; +      const float *trans = brw->attribs.Viewport.translate; + +      sfv.viewport.m00 = scale[0]; +      sfv.viewport.m11 = scale[1]; +      sfv.viewport.m22 = scale[2];  +      sfv.viewport.m30 = trans[0]; +      sfv.viewport.m31 = trans[1]; +      sfv.viewport.m32 = trans[2]; +   } + +   /* _NEW_SCISSOR */ +   sfv.scissor.xmin = brw->attribs.Scissor.minx; +   sfv.scissor.xmax = brw->attribs.Scissor.maxx - 1; +   sfv.scissor.ymin = brw->attribs.Scissor.miny; +   sfv.scissor.ymax = brw->attribs.Scissor.maxy - 1; + +   brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv ); +} + +const struct brw_tracked_state brw_sf_vp = { +   .dirty = { +      .brw   = (BRW_NEW_SCISSOR | +		BRW_NEW_VIEWPORT), +      .cache = 0 +   }, +   .update = upload_sf_vp +}; + +static void upload_sf_unit( struct brw_context *brw ) +{ +   struct brw_sf_unit_state sf; +   memset(&sf, 0, sizeof(sf)); + +   /* CACHE_NEW_SF_PROG */ +   sf.thread0.grf_reg_count = align(brw->sf.prog_data->total_grf, 16) / 16 - 1; +   sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6; +   sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; + +   sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; +   sf.thread3.dispatch_grf_start_reg = 3; +   sf.thread3.urb_entry_read_offset = 1; + +   /* BRW_NEW_URB_FENCE */ +   sf.thread4.nr_urb_entries = brw->urb.nr_sf_entries; +   sf.thread4.urb_entry_allocation_size = brw->urb.sfsize - 1; +   sf.thread4.max_threads = MIN2(12, brw->urb.nr_sf_entries / 2) - 1; + +   if (BRW_DEBUG & DEBUG_SINGLE_THREAD) +      sf.thread4.max_threads = 0; + +   if (BRW_DEBUG & DEBUG_STATS) +      sf.thread4.stats_enable = 1; + +   /* CACHE_NEW_SF_VP */ +   sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5; +   sf.sf5.viewport_transform = 1; + +   /* BRW_NEW_RASTER */ +   if (brw->attribs.Raster->scissor) +      sf.sf6.scissor = 1; + +#if 0 +   if (brw->attribs.Polygon->FrontFace == GL_CCW) +      sf.sf5.front_winding = BRW_FRONTWINDING_CCW; +   else +      sf.sf5.front_winding = BRW_FRONTWINDING_CW; + + +   if (brw->attribs.Polygon->CullFlag) { +      switch (brw->attribs.Polygon->CullFaceMode) { +      case GL_FRONT: +	 sf.sf6.cull_mode = BRW_CULLMODE_FRONT; +	 break; +      case GL_BACK: +	 sf.sf6.cull_mode = BRW_CULLMODE_BACK; +	 break; +      case GL_FRONT_AND_BACK: +	 sf.sf6.cull_mode = BRW_CULLMODE_BOTH; +	 break; +      default: +	 assert(0); +	 break; +      } +   } +   else +      sf.sf6.cull_mode = BRW_CULLMODE_NONE; +#else +   sf.sf5.front_winding = BRW_FRONTWINDING_CCW; +   sf.sf6.cull_mode = BRW_CULLMODE_NONE; +#endif + +   sf.sf6.line_width = CLAMP(brw->attribs.Raster->line_width, 1.0, 5.0) * (1<<1); + +   sf.sf6.line_endcap_aa_region_width = 1; +   if (brw->attribs.Raster->line_smooth) +      sf.sf6.aa_enable = 1; +   else if (sf.sf6.line_width <= 0x2) +       sf.sf6.line_width = 0; + +   sf.sf6.point_rast_rule = 1;	/* opengl conventions */ + +   sf.sf7.sprite_point = brw->attribs.Raster->point_sprite; +   sf.sf7.point_size = CLAMP(brw->attribs.Raster->line_width, 1.0, 255.0) * (1<<3); +   sf.sf7.use_point_size_state = !brw->attribs.Raster->point_size_per_vertex; + +   /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: +    */ +   sf.sf7.trifan_pv = 2; +   sf.sf7.linestrip_pv = 1; +   sf.sf7.tristrip_pv = 2; +   sf.sf7.line_last_pixel_enable = 0; + +   /* Set bias for OpenGL rasterization rules: +    */ +   sf.sf6.dest_org_vbias = 0x8; +   sf.sf6.dest_org_hbias = 0x8; + +   brw->sf.state_gs_offset = brw_cache_data( &brw->cache[BRW_SF_UNIT], &sf ); +} + + +const struct brw_tracked_state brw_sf_unit = { +   .dirty = { +      .brw   = (BRW_NEW_RASTERIZER | +		BRW_NEW_URB_FENCE), +      .cache = (CACHE_NEW_SF_VP | +		CACHE_NEW_SF_PROG) +   }, +   .update = upload_sf_unit +}; + + diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c new file mode 100644 index 0000000000..86d877d7ef --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -0,0 +1,48 @@ + +#include "brw_context.h" +#include "brw_state.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" + + +/** + * XXX this obsolete new and no longer compiled. + */ +void brw_shader_info(const struct tgsi_token *tokens, +		     struct brw_shader_info *info ) +{ +   struct tgsi_parse_context parse; +   int done = 0; + +   tgsi_parse_init( &parse, tokens ); + +   while( !done && +	  !tgsi_parse_end_of_tokens( &parse ) )  +   { +      tgsi_parse_token( &parse ); + +      switch( parse.FullToken.Token.Type ) { +      case TGSI_TOKEN_TYPE_DECLARATION: +      { +	 const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; +	 unsigned last = decl->DeclarationRange.Last; +       +	 // Broken by crazy wpos init: +	 //assert( info->nr_regs[decl->Declaration.File] <= last); + +	 info->nr_regs[decl->Declaration.File] = MAX2(info->nr_regs[decl->Declaration.File], +						      last+1); +	 break; +      } +      case TGSI_TOKEN_TYPE_IMMEDIATE: +      case TGSI_TOKEN_TYPE_INSTRUCTION: +      default: +	 done = 1; +	 break; +      } +   } + +   tgsi_parse_free (&parse); +    +} diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c new file mode 100644 index 0000000000..b47f5373f3 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -0,0 +1,469 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors:  Zack Rusin <zack@tungstengraphics.com> + *           Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_draw.h" + + +#define DUP( TYPE, VAL )                        \ +do {                                            \ +   struct TYPE *x = malloc(sizeof(*x));         \ +   memcpy(x, VAL, sizeof(*x) );                 \ +   return x;                                    \ +} while (0) + +/************************************************************************ + * Blend  + */ +static void * +brw_create_blend_state(struct pipe_context *pipe, +                        const struct pipe_blend_state *blend) +{    +   DUP( pipe_blend_state, blend ); +} + +static void brw_bind_blend_state(struct pipe_context *pipe, +                                 void *blend) +{ +   struct brw_context *brw = brw_context(pipe); + +   brw->attribs.Blend = (struct pipe_blend_state*)blend; +   brw->state.dirty.brw |= BRW_NEW_BLEND; +} + + +static void brw_delete_blend_state(struct pipe_context *pipe, void *blend) +{ +   free(blend); +} + +static void brw_set_blend_color( struct pipe_context *pipe, +			     const struct pipe_blend_color *blend_color ) +{ +   struct brw_context *brw = brw_context(pipe); + +   brw->attribs.BlendColor = *blend_color; + +   brw->state.dirty.brw |= BRW_NEW_BLEND; +} + +/************************************************************************ + * Sampler  + */ + +static void * +brw_create_sampler_state(struct pipe_context *pipe, +                          const struct pipe_sampler_state *sampler) +{ +   DUP( pipe_sampler_state, sampler ); +} + +static void brw_bind_sampler_states(struct pipe_context *pipe, +                                    unsigned num, void **sampler) +{ +   struct brw_context *brw = brw_context(pipe); + +   assert(num <= PIPE_MAX_SAMPLERS); + +   /* Check for no-op */ +   if (num == brw->num_samplers && +       !memcmp(brw->attribs.Samplers, sampler, num * sizeof(void *))) +      return; + +   memcpy(brw->attribs.Samplers, sampler, num * sizeof(void *)); +   memset(&brw->attribs.Samplers[num], 0, (PIPE_MAX_SAMPLERS - num) * +          sizeof(void *)); + +   brw->num_samplers = num; + +   brw->state.dirty.brw |= BRW_NEW_SAMPLER; +} + +static void brw_delete_sampler_state(struct pipe_context *pipe, +                                      void *sampler) +{ +   free(sampler); +} + + +/************************************************************************ + * Depth stencil  + */ + +static void * +brw_create_depth_stencil_state(struct pipe_context *pipe, +                           const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ +   DUP( pipe_depth_stencil_alpha_state, depth_stencil ); +} + +static void brw_bind_depth_stencil_state(struct pipe_context *pipe, +                                         void *depth_stencil) +{ +   struct brw_context *brw = brw_context(pipe); + +   brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + +   brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL; +} + +static void brw_delete_depth_stencil_state(struct pipe_context *pipe, +                                           void *depth_stencil) +{ +   free(depth_stencil); +} + +/************************************************************************ + * Scissor + */ +static void brw_set_scissor_state( struct pipe_context *pipe, +                                 const struct pipe_scissor_state *scissor ) +{ +   struct brw_context *brw = brw_context(pipe); + +   memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) ); +   brw->state.dirty.brw |= BRW_NEW_SCISSOR; +} + + +/************************************************************************ + * Stipple + */ + +static void brw_set_polygon_stipple( struct pipe_context *pipe, +                                   const struct pipe_poly_stipple *stipple ) +{ +} + + +/************************************************************************ + * Fragment shader + */ + +static void * brw_create_fs_state(struct pipe_context *pipe, +                                   const struct pipe_shader_state *shader) +{ +   struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); + +   brw_fp->program.tokens = tgsi_dup_tokens(shader->tokens); +   brw_fp->id = brw_context(pipe)->program_id++; + +   tgsi_scan_shader(shader->tokens, &brw_fp->info); + +#if 0 +   brw_shader_info(shader->tokens, +		   &brw_fp->info2); +#endif + +   tgsi_dump(shader->tokens, 0); + + +   return (void *)brw_fp; +} + +static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) +{ +   struct brw_context *brw = brw_context(pipe); + +   brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader; +   brw->state.dirty.brw |= BRW_NEW_FS; +} + +static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) +{ +   struct brw_fragment_program *brw_fp = (struct brw_fragment_program *) shader; + +   FREE((void *) brw_fp->program.tokens); +   FREE(brw_fp); +} + + +/************************************************************************ + * Vertex shader and other TNL state  + */ + +static void *brw_create_vs_state(struct pipe_context *pipe, +                                 const struct pipe_shader_state *shader) +{ +   struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); + +   brw_vp->program.tokens = tgsi_dup_tokens(shader->tokens); +   brw_vp->id = brw_context(pipe)->program_id++; + +   tgsi_scan_shader(shader->tokens, &brw_vp->info); + +#if 0 +   brw_shader_info(shader->tokens, +		   &brw_vp->info2); +#endif +   tgsi_dump(shader->tokens, 0); + +   return (void *)brw_vp; +} + +static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) +{ +   struct brw_context *brw = brw_context(pipe); + +   brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; +   brw->state.dirty.brw |= BRW_NEW_VS; + +   debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); +} + +static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) +{ +   struct brw_vertex_program *brw_vp = (struct brw_vertex_program *) shader; + +   FREE((void *) brw_vp->program.tokens); +   FREE(brw_vp); +} + + +static void brw_set_clip_state( struct pipe_context *pipe, +                                const struct pipe_clip_state *clip ) +{ +   struct brw_context *brw = brw_context(pipe); + +   brw->attribs.Clip = *clip; +} + + +static void brw_set_viewport_state( struct pipe_context *pipe, +				     const struct pipe_viewport_state *viewport ) +{ +   struct brw_context *brw = brw_context(pipe); + +   brw->attribs.Viewport = *viewport; /* struct copy */ +   brw->state.dirty.brw |= BRW_NEW_VIEWPORT; + +   /* pass the viewport info to the draw module */ +   //draw_set_viewport_state(brw->draw, viewport); +} + + +static void brw_set_vertex_buffers(struct pipe_context *pipe, +				   unsigned count, +				   const struct pipe_vertex_buffer *buffers) +{ +   struct brw_context *brw = brw_context(pipe); +   memcpy(brw->vb.vbo_array, buffers, count * sizeof(buffers[0])); +} + +static void brw_set_vertex_elements(struct pipe_context *pipe, +                                    unsigned count, +                                    const struct pipe_vertex_element *elements) +{ +   /* flush ? */ +   struct brw_context *brw = brw_context(pipe); +   uint i; + +   assert(count <= PIPE_MAX_ATTRIBS); + +   for (i = 0; i < count; i++) { +      struct brw_vertex_element_state el; +      memset(&el, 0, sizeof(el)); + +      el.ve0.src_offset = elements[i].src_offset; +      el.ve0.src_format = brw_translate_surface_format(elements[i].src_format); +      el.ve0.valid = 1; +      el.ve0.vertex_buffer_index = elements[i].vertex_buffer_index; + +      el.ve1.dst_offset   = i * 4; + +      el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; +      el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; +      el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; +      el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; + +      switch (elements[i].nr_components) { +      case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; +      case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; +      case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; +         break; +      } + +      brw->vb.inputs[i] = el; +   } +} + + + +/************************************************************************ + * Constant buffers + */ + +static void brw_set_constant_buffer(struct pipe_context *pipe, +                                     uint shader, uint index, +                                     const struct pipe_constant_buffer *buf) +{ +   struct brw_context *brw = brw_context(pipe); + +   assert(buf == 0 || index == 0); + +   brw->attribs.Constants[shader] = buf; +   brw->state.dirty.brw |= BRW_NEW_CONSTANTS; +} + + +/************************************************************************ + * Texture surfaces + */ + + +static void brw_set_sampler_textures(struct pipe_context *pipe, +                                     unsigned num, +                                     struct pipe_texture **texture) +{ +   struct brw_context *brw = brw_context(pipe); +   uint i; + +   assert(num <= PIPE_MAX_SAMPLERS); + +   /* Check for no-op */ +   if (num == brw->num_textures && +       !memcmp(brw->attribs.Texture, texture, num * +               sizeof(struct pipe_texture *))) +      return; + +   for (i = 0; i < num; i++) +      pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], +                             texture[i]); + +   for (i = num; i < brw->num_textures; i++) +      pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], +                             NULL); + +   brw->num_textures = num; + +   brw->state.dirty.brw |= BRW_NEW_TEXTURE; +} + + +/************************************************************************ + * Render targets, etc + */ + +static void brw_set_framebuffer_state(struct pipe_context *pipe, +				       const struct pipe_framebuffer_state *fb) +{ +   struct brw_context *brw = brw_context(pipe); + +   brw->attribs.FrameBuffer = *fb; /* struct copy */ + +   brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER; +} + + + +/************************************************************************ + * Rasterizer state + */ + +static void * +brw_create_rasterizer_state(struct pipe_context *pipe, +                             const struct pipe_rasterizer_state *rasterizer) +{ +   DUP(pipe_rasterizer_state, rasterizer); +} + +static void brw_bind_rasterizer_state( struct pipe_context *pipe, +                                        void *setup ) +{ +   struct brw_context *brw = brw_context(pipe); + +   brw->attribs.Raster = (struct pipe_rasterizer_state *)setup; + +   /* Also pass-through to draw module: +    */ +   //draw_set_rasterizer_state(brw->draw, setup); + +   brw->state.dirty.brw |= BRW_NEW_RASTERIZER; +} + +static void brw_delete_rasterizer_state(struct pipe_context *pipe, +                                         void *setup) +{ +   free(setup); +} + + + +void +brw_init_state_functions( struct brw_context *brw ) +{ +   brw->pipe.create_blend_state = brw_create_blend_state; +   brw->pipe.bind_blend_state = brw_bind_blend_state; +   brw->pipe.delete_blend_state = brw_delete_blend_state; + +   brw->pipe.create_sampler_state = brw_create_sampler_state; +   brw->pipe.bind_sampler_states = brw_bind_sampler_states; +   brw->pipe.delete_sampler_state = brw_delete_sampler_state; + +   brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; +   brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; +   brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; + +   brw->pipe.create_rasterizer_state = brw_create_rasterizer_state; +   brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state; +   brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state; +   brw->pipe.create_fs_state = brw_create_fs_state; +   brw->pipe.bind_fs_state = brw_bind_fs_state; +   brw->pipe.delete_fs_state = brw_delete_fs_state; +   brw->pipe.create_vs_state = brw_create_vs_state; +   brw->pipe.bind_vs_state = brw_bind_vs_state; +   brw->pipe.delete_vs_state = brw_delete_vs_state; + +   brw->pipe.set_blend_color = brw_set_blend_color; +   brw->pipe.set_clip_state = brw_set_clip_state; +   brw->pipe.set_constant_buffer = brw_set_constant_buffer; +   brw->pipe.set_framebuffer_state = brw_set_framebuffer_state; + +//   brw->pipe.set_feedback_state = brw_set_feedback_state; +//   brw->pipe.set_feedback_buffer = brw_set_feedback_buffer; + +   brw->pipe.set_polygon_stipple = brw_set_polygon_stipple; +   brw->pipe.set_scissor_state = brw_set_scissor_state; +   brw->pipe.set_sampler_textures = brw_set_sampler_textures; +   brw->pipe.set_viewport_state = brw_set_viewport_state; +   brw->pipe.set_vertex_buffers = brw_set_vertex_buffers; +   brw->pipe.set_vertex_elements = brw_set_vertex_elements; +} diff --git a/src/gallium/drivers/i965simple/brw_state.h b/src/gallium/drivers/i965simple/brw_state.h new file mode 100644 index 0000000000..de0a6371b8 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state.h @@ -0,0 +1,151 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. +  + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: +  + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. +  + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +  + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ +     + +#ifndef BRW_STATE_H +#define BRW_STATE_H + +#include "brw_context.h" +#include "brw_winsys.h" + + +const struct brw_tracked_state brw_blend_constant_color; +const struct brw_tracked_state brw_cc_unit; +const struct brw_tracked_state brw_cc_vp; +const struct brw_tracked_state brw_clip_prog; +const struct brw_tracked_state brw_clip_unit; +const struct brw_tracked_state brw_constant_buffer_state; +const struct brw_tracked_state brw_constant_buffer; +const struct brw_tracked_state brw_curbe_offsets; +const struct brw_tracked_state brw_invarient_state; +const struct brw_tracked_state brw_gs_prog; +const struct brw_tracked_state brw_gs_unit; +const struct brw_tracked_state brw_drawing_rect; +const struct brw_tracked_state brw_line_stipple; +const struct brw_tracked_state brw_pipelined_state_pointers; +const struct brw_tracked_state brw_binding_table_pointers; +const struct brw_tracked_state brw_depthbuffer; +const struct brw_tracked_state brw_polygon_stipple_offset; +const struct brw_tracked_state brw_polygon_stipple; +const struct brw_tracked_state brw_program_parameters; +const struct brw_tracked_state brw_recalculate_urb_fence; +const struct brw_tracked_state brw_sf_prog; +const struct brw_tracked_state brw_sf_unit; +const struct brw_tracked_state brw_sf_vp; +const struct brw_tracked_state brw_state_base_address; +const struct brw_tracked_state brw_urb_fence; +const struct brw_tracked_state brw_vertex_state; +const struct brw_tracked_state brw_vs_prog; +const struct brw_tracked_state brw_vs_unit; +const struct brw_tracked_state brw_wm_prog; +const struct brw_tracked_state brw_wm_samplers; +const struct brw_tracked_state brw_wm_surfaces; +const struct brw_tracked_state brw_wm_unit; + +const struct brw_tracked_state brw_psp_urb_cbs; + +const struct brw_tracked_state brw_active_vertprog; +const struct brw_tracked_state brw_tnl_vertprog; +const struct brw_tracked_state brw_pipe_control; + +const struct brw_tracked_state brw_clear_surface_cache; +const struct brw_tracked_state brw_clear_batch_cache; + +/*********************************************************************** + * brw_state_cache.c + */ +unsigned brw_cache_data(struct brw_cache *cache, +		      const void *data ); + +unsigned brw_cache_data_sz(struct brw_cache *cache, +			 const void *data, +			 unsigned data_sz); + +unsigned brw_upload_cache( struct brw_cache *cache, +			 const void *key, +			 unsigned key_sz, +			 const void *data, +			 unsigned data_sz, +			 const void *aux, +			 void *aux_return ); + +boolean brw_search_cache( struct brw_cache *cache, +			    const void *key, +			    unsigned key_size, +			    void *aux_return, +			    unsigned *offset_return); + +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); + +static inline struct pipe_buffer *brw_cache_buffer(struct brw_context *brw, +                                                          enum brw_cache_id id) +{ +   return brw->cache[id].pool->buffer; +} + +/*********************************************************************** + * brw_state_batch.c + */ +#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) + +boolean brw_cached_batch_struct( struct brw_context *brw, +				   const void *data, +				   unsigned sz ); + +void brw_destroy_batch_cache( struct brw_context *brw ); + + +/*********************************************************************** + * brw_state_pool.c + */ +void brw_init_pools( struct brw_context *brw ); +void brw_destroy_pools( struct brw_context *brw ); + +boolean brw_pool_alloc( struct brw_mem_pool *pool, +			  unsigned size, +			  unsigned alignment, +			  unsigned *offset_return); + +void brw_pool_fence( struct brw_context *brw, +		     struct brw_mem_pool *pool, +		     unsigned fence ); + + +void brw_pool_check_wrap( struct brw_context *brw, +			  struct brw_mem_pool *pool ); + +void brw_clear_all_caches( struct brw_context *brw ); +void brw_invalidate_pools( struct brw_context *brw ); +void brw_clear_batch_cache_flush( struct brw_context *brw ); + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_state_batch.c b/src/gallium/drivers/i965simple/brw_state_batch.c new file mode 100644 index 0000000000..43a1c89fc4 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_batch.c @@ -0,0 +1,113 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + +#include "brw_state.h" +#include "brw_winsys.h" + +#include "util/u_memory.h" + +/* A facility similar to the data caching code above, which aims to + * prevent identical commands being issued repeatedly. + */ +boolean brw_cached_batch_struct( struct brw_context *brw, +                                 const void *data, +                                 unsigned sz ) +{ +   struct brw_cached_batch_item *item = brw->cached_batch_items; +   struct header *newheader = (struct header *)data; + +   if (brw->emit_state_always) { +      brw_batchbuffer_data(brw->winsys, data, sz); +      return TRUE; +   } + +   while (item) { +      if (item->header->opcode == newheader->opcode) { +	 if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) +	    return FALSE; +	 if (item->sz != sz) { +	    FREE(item->header); +	    item->header = MALLOC(sz); +	    item->sz = sz; +	 } +	 goto emit; +      } +      item = item->next; +   } + +   assert(!item); +   item = CALLOC_STRUCT(brw_cached_batch_item); +   item->header = MALLOC(sz); +   item->sz = sz; +   item->next = brw->cached_batch_items; +   brw->cached_batch_items = item; + +emit: +   memcpy(item->header, newheader, sz); +   brw_batchbuffer_data(brw->winsys, data, sz); +   return TRUE; +} + +static void clear_batch_cache( struct brw_context *brw ) +{ +   struct brw_cached_batch_item *item = brw->cached_batch_items; + +   while (item) { +      struct brw_cached_batch_item *next = item->next; +      free((void *)item->header); +      free(item); +      item = next; +   } + +   brw->cached_batch_items = NULL; + + +   brw_clear_all_caches(brw); + +   brw_invalidate_pools(brw); +} + +void brw_clear_batch_cache_flush( struct brw_context *brw ) +{ +   clear_batch_cache(brw); + +/*    brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */ + +   brw->state.dirty.brw |= ~0; +   brw->state.dirty.cache |= ~0; +} + + + +void brw_destroy_batch_cache( struct brw_context *brw ) +{ +   clear_batch_cache(brw); +} diff --git a/src/gallium/drivers/i965simple/brw_state_cache.c b/src/gallium/drivers/i965simple/brw_state_cache.c new file mode 100644 index 0000000000..094248fa69 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_cache.c @@ -0,0 +1,443 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#include "brw_state.h" + +#include "brw_wm.h" +#include "brw_vs.h" +#include "brw_clip.h" +#include "brw_sf.h" +#include "brw_gs.h" + +#include "util/u_memory.h" + + + +/*********************************************************************** + * Check cache for uploaded version of struct, else upload new one. + * Fail when memory is exhausted. + * + * XXX: FIXME: Currently search is so slow it would be quicker to + * regenerate the data every time... + */ + +static unsigned hash_key( const void *key, unsigned key_size ) +{ +   unsigned *ikey = (unsigned *)key; +   unsigned hash = 0, i; + +   assert(key_size % 4 == 0); + +   /* I'm sure this can be improved on: +    */ +   for (i = 0; i < key_size/4; i++) +      hash ^= ikey[i]; + +   return hash; +} + +static struct brw_cache_item *search_cache( struct brw_cache *cache, +					     unsigned hash, +					     const void *key, +					     unsigned key_size) +{ +   struct brw_cache_item *c; + +   for (c = cache->items[hash % cache->size]; c; c = c->next) { +      if (c->hash == hash && +	  c->key_size == key_size && +	  memcmp(c->key, key, key_size) == 0) +	 return c; +   } + +   return NULL; +} + + +static void rehash( struct brw_cache *cache ) +{ +   struct brw_cache_item **items; +   struct brw_cache_item *c, *next; +   unsigned size, i; + +   size = cache->size * 3; +   items = (struct brw_cache_item**) MALLOC(size * sizeof(*items)); +   memset(items, 0, size * sizeof(*items)); + +   for (i = 0; i < cache->size; i++) +      for (c = cache->items[i]; c; c = next) { +	 next = c->next; +	 c->next = items[c->hash % size]; +	 items[c->hash % size] = c; +      } + +   FREE(cache->items); +   cache->items = items; +   cache->size = size; +} + + +boolean brw_search_cache( struct brw_cache *cache, +			    const void *key, +			    unsigned key_size, +			    void *aux_return, +			    unsigned *offset_return) +{ +   struct brw_cache_item *item; +   unsigned addr = 0; +   unsigned hash = hash_key(key, key_size); + +   item = search_cache(cache, hash, key, key_size); + +   if (item) { +      if (aux_return) +	 *(void **)aux_return = (void *)((char *)item->key + item->key_size); + +      *offset_return = addr = item->offset; +   } + +   if (item == NULL || addr != cache->last_addr) { +      cache->brw->state.dirty.cache |= 1<<cache->id; +      cache->last_addr = addr; +   } + +   return item != NULL; +} + +unsigned brw_upload_cache( struct brw_cache *cache, +			 const void *key, +			 unsigned key_size, +			 const void *data, +			 unsigned data_size, +			 const void *aux, +			 void *aux_return ) +{ +   unsigned offset; +   struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); +   unsigned hash = hash_key(key, key_size); +   void *tmp = MALLOC(key_size + cache->aux_size); + +   if (!brw_pool_alloc(cache->pool, data_size, 1 << 6, &offset)) { +      /* Should not be possible: +       */ +      debug_printf("brw_pool_alloc failed\n"); +      exit(1); +   } + +   memcpy(tmp, key, key_size); + +   if (cache->aux_size) +      memcpy(tmp+key_size, aux, cache->aux_size); + +   item->key = tmp; +   item->hash = hash; +   item->key_size = key_size; +   item->offset = offset; +   item->data_size = data_size; + +   if (++cache->n_items > cache->size * 1.5) +      rehash(cache); + +   hash %= cache->size; +   item->next = cache->items[hash]; +   cache->items[hash] = item; + +   if (aux_return) { +      assert(cache->aux_size); +      *(void **)aux_return = (void *)((char *)item->key + item->key_size); +   } + +   if (BRW_DEBUG & DEBUG_STATE) +      debug_printf("upload %s: %d bytes to pool buffer %p offset %x\n", +             cache->name,  +	     data_size, +             (void*)cache->pool->buffer, +             offset); + +   /* Copy data to the buffer: +    */ +   cache->brw->winsys->buffer_subdata_typed(cache->brw->winsys, +					    cache->pool->buffer,  +					    offset,  +					    data_size,  +					    data, +					    cache->id); + +   cache->brw->state.dirty.cache |= 1<<cache->id; +   cache->last_addr = offset; + +   return offset; +} + +/* This doesn't really work with aux data.  Use search/upload instead + */ +unsigned brw_cache_data_sz(struct brw_cache *cache, +			 const void *data, +			 unsigned data_size) +{ +   unsigned addr; + +   if (!brw_search_cache(cache, data, data_size, NULL, &addr)) { +      addr = brw_upload_cache(cache, +			      data, data_size, +			      data, data_size, +			      NULL, NULL); +   } + +   return addr; +} + +unsigned brw_cache_data(struct brw_cache *cache, +		      const void *data) +{ +   return brw_cache_data_sz(cache, data, cache->key_size); +} + +enum pool_type { +   DW_SURFACE_STATE, +   DW_GENERAL_STATE +}; + +static void brw_init_cache( struct brw_context *brw, +			    const char *name, +			    unsigned id, +			    unsigned key_size, +			    unsigned aux_size, +			    enum pool_type pool_type) +{ +   struct brw_cache *cache = &brw->cache[id]; +   cache->brw = brw; +   cache->id = id; +   cache->name = name; +   cache->items = NULL; + +   cache->size = 7; +   cache->n_items = 0; +   cache->items = (struct brw_cache_item **) +      CALLOC(cache->size, sizeof(struct brw_cache_item)); + + +   cache->key_size = key_size; +   cache->aux_size = aux_size; +   switch (pool_type) { +   case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break; +   case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break; +   default: assert(0); break; +   } +} + +void brw_init_caches( struct brw_context *brw ) +{ + +   brw_init_cache(brw, +		  "CC_VP", +		  BRW_CC_VP, +		  sizeof(struct brw_cc_viewport), +		  0, +		  DW_GENERAL_STATE); + +   brw_init_cache(brw, +		  "CC_UNIT", +		  BRW_CC_UNIT, +		  sizeof(struct brw_cc_unit_state), +		  0, +		  DW_GENERAL_STATE); + +   brw_init_cache(brw, +		  "WM_PROG", +		  BRW_WM_PROG, +		  sizeof(struct brw_wm_prog_key), +		  sizeof(struct brw_wm_prog_data), +		  DW_GENERAL_STATE); + +   brw_init_cache(brw, +		  "SAMPLER_DEFAULT_COLOR", +		  BRW_SAMPLER_DEFAULT_COLOR, +		  sizeof(struct brw_sampler_default_color), +		  0, +		  DW_GENERAL_STATE); + +   brw_init_cache(brw, +		  "SAMPLER", +		  BRW_SAMPLER, +		  0,		/* variable key/data size */ +		  0, +		  DW_GENERAL_STATE); + +   brw_init_cache(brw, +		  "WM_UNIT", +		  BRW_WM_UNIT, +		  sizeof(struct brw_wm_unit_state), +		  0, +		  DW_GENERAL_STATE); + +   brw_init_cache(brw, +		  "SF_PROG", +		  BRW_SF_PROG, +		  sizeof(struct brw_sf_prog_key), +		  sizeof(struct brw_sf_prog_data), +		  DW_GENERAL_STATE); + +   brw_init_cache(brw, +		  "SF_VP", +		  BRW_SF_VP, +		  sizeof(struct brw_sf_viewport), +		  0, +		  DW_GENERAL_STATE); + +   brw_init_cache(brw, +		  "SF_UNIT", +		  BRW_SF_UNIT, +		  sizeof(struct brw_sf_unit_state), +		  0, +		  DW_GENERAL_STATE); + +   brw_init_cache(brw, +		  "VS_UNIT", +		  BRW_VS_UNIT, +		  sizeof(struct brw_vs_unit_state), +		  0, +		  DW_GENERAL_STATE); + +   brw_init_cache(brw, +		  "VS_PROG", +		  BRW_VS_PROG, +		  sizeof(struct brw_vs_prog_key), +		  sizeof(struct brw_vs_prog_data), +		  DW_GENERAL_STATE); + +   brw_init_cache(brw, +		  "CLIP_UNIT", +		  BRW_CLIP_UNIT, +		  sizeof(struct brw_clip_unit_state), +		  0, +		  DW_GENERAL_STATE); + +   brw_init_cache(brw, +		  "CLIP_PROG", +		  BRW_CLIP_PROG, +		  sizeof(struct brw_clip_prog_key), +		  sizeof(struct brw_clip_prog_data), +		  DW_GENERAL_STATE); + +   brw_init_cache(brw, +		  "GS_UNIT", +		  BRW_GS_UNIT, +		  sizeof(struct brw_gs_unit_state), +		  0, +		  DW_GENERAL_STATE); + +   brw_init_cache(brw, +		  "GS_PROG", +		  BRW_GS_PROG, +		  sizeof(struct brw_gs_prog_key), +		  sizeof(struct brw_gs_prog_data), +		  DW_GENERAL_STATE); + +   brw_init_cache(brw, +		  "SS_SURFACE", +		  BRW_SS_SURFACE, +		  sizeof(struct brw_surface_state), +		  0, +		  DW_SURFACE_STATE); + +   brw_init_cache(brw, +		  "SS_SURF_BIND", +		  BRW_SS_SURF_BIND, +		  sizeof(struct brw_surface_binding_table), +		  0, +		  DW_SURFACE_STATE); +} + + +/* When we lose hardware context, need to invalidate the surface cache + * as these structs must be explicitly re-uploaded.  They are subject + * to fixup by the memory manager as they contain absolute agp + * offsets, so we need to ensure there is a fresh version of the + * struct available to receive the fixup. + * + * XXX: Need to ensure that there aren't two versions of a surface or + * bufferobj with different backing data active in the same buffer at + * once?  Otherwise the cache could confuse them.  Maybe better not to + * cache at all? + * + * --> Isn't this the same as saying need to ensure batch is flushed + *         before new data is uploaded to an existing buffer?  We + *         already try to make sure of that. + */ +static void clear_cache( struct brw_cache *cache ) +{ +   struct brw_cache_item *c, *next; +   unsigned i; + +   for (i = 0; i < cache->size; i++) { +      for (c = cache->items[i]; c; c = next) { +	 next = c->next; +	 free((void *)c->key); +	 free(c); +      } +      cache->items[i] = NULL; +   } + +   cache->n_items = 0; +} + +void brw_clear_all_caches( struct brw_context *brw ) +{ +   int i; + +   if (BRW_DEBUG & DEBUG_STATE) +      debug_printf("%s\n", __FUNCTION__); + +   for (i = 0; i < BRW_MAX_CACHE; i++) +      clear_cache(&brw->cache[i]); + +   if (brw->curbe.last_buf) { +      FREE(brw->curbe.last_buf); +      brw->curbe.last_buf = NULL; +   } + +   brw->state.dirty.brw |= ~0; +   brw->state.dirty.cache |= ~0; +} + + + + + +void brw_destroy_caches( struct brw_context *brw ) +{ +   unsigned i; + +   for (i = 0; i < BRW_MAX_CACHE; i++) +      clear_cache(&brw->cache[i]); +} diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c new file mode 100644 index 0000000000..e91263cb1f --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_pool.c @@ -0,0 +1,138 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + +/** @file brw_state_pool.c + * Implements the state pool allocator. + * + * For the 965, we create two state pools for state cache entries.  Objects + * will be allocated into the pools depending on which state base address + * their pointer is relative to in other 965 state. + * + * The state pools are relatively simple: As objects are allocated, increment + * the offset to allocate space.  When the pool is "full" (rather, close to + * full), we reset the pool and reset the state cache entries that point into + * the pool. + */ + +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "brw_context.h" +#include "brw_state.h" + +boolean brw_pool_alloc( struct brw_mem_pool *pool, +			  unsigned size, +			  unsigned alignment, +			  unsigned *offset_return) +{ +   unsigned fixup = align(pool->offset, alignment) - pool->offset; + +   size = align(size, 4); + +   if (pool->offset + fixup + size >= pool->size) { +      debug_printf("%s failed\n", __FUNCTION__); +      assert(0); +      exit(0); +   } + +   pool->offset += fixup; +   *offset_return = pool->offset; +   pool->offset += size; + +   return TRUE; +} + +static +void brw_invalidate_pool( struct brw_mem_pool *pool ) +{ +   if (BRW_DEBUG & DEBUG_STATE) +      debug_printf("\n\n\n %s \n\n\n", __FUNCTION__); + +   pool->offset = 0; + +   brw_clear_all_caches(pool->brw); +} + + +static void brw_init_pool( struct brw_context *brw, +			   unsigned pool_id, +			   unsigned size ) +{ +   struct brw_mem_pool *pool = &brw->pool[pool_id]; + +   pool->size = size; +   pool->brw = brw; + +   pool->buffer = pipe_buffer_create(brw->pipe.screen, +                                     4096, +                                     0 /*  DRM_BO_FLAG_MEM_TT */, +                                     size); +} + +static void brw_destroy_pool( struct brw_context *brw, +			      unsigned pool_id ) +{ +   struct brw_mem_pool *pool = &brw->pool[pool_id]; + +   pipe_buffer_reference( pool->brw->pipe.screen, +			  &pool->buffer, +			  NULL ); +} + + +void brw_pool_check_wrap( struct brw_context *brw, +			  struct brw_mem_pool *pool ) +{ +   if (pool->offset > (pool->size * 3) / 4) { +      brw->state.dirty.brw |= BRW_NEW_SCENE; +   } + +} + +void brw_init_pools( struct brw_context *brw ) +{ +   brw_init_pool(brw, BRW_GS_POOL, 0x80000); +   brw_init_pool(brw, BRW_SS_POOL, 0x80000); +} + +void brw_destroy_pools( struct brw_context *brw ) +{ +   brw_destroy_pool(brw, BRW_GS_POOL); +   brw_destroy_pool(brw, BRW_SS_POOL); +} + + +void brw_invalidate_pools( struct brw_context *brw ) +{ +   brw_invalidate_pool(&brw->pool[BRW_GS_POOL]); +   brw_invalidate_pool(&brw->pool[BRW_SS_POOL]); +} diff --git a/src/gallium/drivers/i965simple/brw_state_upload.c b/src/gallium/drivers/i965simple/brw_state_upload.c new file mode 100644 index 0000000000..bac9161b5f --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_upload.c @@ -0,0 +1,202 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#include "brw_context.h" +#include "brw_state.h" + +#include "util/u_memory.h" + +/* This is used to initialize brw->state.atoms[].  We could use this + * list directly except for a single atom, brw_constant_buffer, which + * has a .dirty value which changes according to the parameters of the + * current fragment and vertex programs, and so cannot be a static + * value. + */ +const struct brw_tracked_state *atoms[] = +{ +   &brw_vs_prog, +   &brw_gs_prog, +   &brw_clip_prog, +   &brw_sf_prog, +   &brw_wm_prog, + +   /* Once all the programs are done, we know how large urb entry +    * sizes need to be and can decide if we need to change the urb +    * layout. +    */ +   &brw_curbe_offsets, +   &brw_recalculate_urb_fence, + + +   &brw_cc_vp, +   &brw_cc_unit, + +   &brw_wm_surfaces,		/* must do before samplers */ +   &brw_wm_samplers, + +   &brw_wm_unit, +   &brw_sf_vp, +   &brw_sf_unit, +   &brw_vs_unit,		/* always required, enabled or not */ +   &brw_clip_unit, +   &brw_gs_unit, + +   /* Command packets: +    */ +   &brw_invarient_state, +   &brw_state_base_address, +   &brw_pipe_control, + +   &brw_binding_table_pointers, +   &brw_blend_constant_color, + +   &brw_drawing_rect, +   &brw_depthbuffer, + +   &brw_polygon_stipple, +   &brw_line_stipple, + +   &brw_psp_urb_cbs, + +   &brw_constant_buffer +}; + + +void brw_init_state( struct brw_context *brw ) +{ +   brw_init_pools(brw); +   brw_init_caches(brw); + +   brw->state.dirty.brw = ~0; +   brw->emit_state_always = 0; +} + + +void brw_destroy_state( struct brw_context *brw ) +{ +   brw_destroy_caches(brw); +   brw_destroy_batch_cache(brw); +   brw_destroy_pools(brw); +} + +/*********************************************************************** + */ + +static boolean check_state( const struct brw_state_flags *a, +			      const struct brw_state_flags *b ) +{ +   return ((a->brw & b->brw) || +	   (a->cache & b->cache)); +} + +static void accumulate_state( struct brw_state_flags *a, +			      const struct brw_state_flags *b ) +{ +   a->brw |= b->brw; +   a->cache |= b->cache; +} + + +static void xor_states( struct brw_state_flags *result, +			     const struct brw_state_flags *a, +			      const struct brw_state_flags *b ) +{ +   result->brw = a->brw ^ b->brw; +   result->cache = a->cache ^ b->cache; +} + + +/*********************************************************************** + * Emit all state: + */ +void brw_validate_state( struct brw_context *brw ) +{ +   struct brw_state_flags *state = &brw->state.dirty; +   unsigned i; + +   if (brw->emit_state_always)  +      state->brw |= ~0; + +   if (state->cache == 0 && +       state->brw == 0) +      return; + +   if (brw->state.dirty.brw & BRW_NEW_SCENE) +      brw_clear_batch_cache_flush(brw); + +   if (BRW_DEBUG) { +      /* Debug version which enforces various sanity checks on the +       * state flags which are generated and checked to help ensure +       * state atoms are ordered correctly in the list. +       */ +      struct brw_state_flags examined, prev; +      memset(&examined, 0, sizeof(examined)); +      prev = *state; + +      for (i = 0; i < Elements(atoms); i++) { +	 const struct brw_tracked_state *atom = atoms[i]; +	 struct brw_state_flags generated; + +	 assert(atom->dirty.brw || +		atom->dirty.cache); +	 assert(atom->update); + +	 if (check_state(state, &atom->dirty)) { +	    atom->update( brw ); +	 } + +	 accumulate_state(&examined, &atom->dirty); + +	 /* generated = (prev ^ state) +	  * if (examined & generated) +	  *     fail; +	  */ +	 xor_states(&generated, &prev, state); +	 assert(!check_state(&examined, &generated)); +	 prev = *state; +      } +   } +   else { +      for (i = 0; i < Elements(atoms); i++) { +	 const struct brw_tracked_state *atom = atoms[i]; + +	 assert(atom->dirty.brw || +		atom->dirty.cache); +	 assert(atom->update); + +	 if (check_state(state, &atom->dirty)) +	    atom->update( brw ); +      } +   } + +   memset(state, 0, sizeof(*state)); +} diff --git a/src/gallium/drivers/i965simple/brw_structs.h b/src/gallium/drivers/i965simple/brw_structs.h new file mode 100644 index 0000000000..bbb087e95d --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_structs.h @@ -0,0 +1,1348 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#ifndef BRW_STRUCTS_H +#define BRW_STRUCTS_H + +#include "pipe/p_compiler.h" + +/* Command packets: + */ +struct header +{ +   unsigned length:16; +   unsigned opcode:16; +}; + + +union header_union +{ +   struct header bits; +   unsigned dword; +}; + +struct brw_3d_control +{ +   struct +   { +      unsigned length:8; +      unsigned notify_enable:1; +      unsigned pad:3; +      unsigned wc_flush_enable:1; +      unsigned depth_stall_enable:1; +      unsigned operation:2; +      unsigned opcode:16; +   } header; + +   struct +   { +      unsigned pad:2; +      unsigned dest_addr_type:1; +      unsigned dest_addr:29; +   } dest; + +   unsigned dword2; +   unsigned dword3; +}; + + +struct brw_3d_primitive +{ +   struct +   { +      unsigned length:8; +      unsigned pad:2; +      unsigned topology:5; +      unsigned indexed:1; +      unsigned opcode:16; +   } header; + +   unsigned verts_per_instance; +   unsigned start_vert_location; +   unsigned instance_count; +   unsigned start_instance_location; +   unsigned base_vert_location; +}; + +/* These seem to be passed around as function args, so it works out + * better to keep them as #defines: + */ +#define BRW_FLUSH_READ_CACHE           0x1 +#define BRW_FLUSH_STATE_CACHE          0x2 +#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 +#define BRW_FLUSH_SNAPSHOT_COUNTERS    0x8 + +struct brw_mi_flush +{ +   unsigned flags:4; +   unsigned pad:12; +   unsigned opcode:16; +}; + +struct brw_vf_statistics +{ +   unsigned statistics_enable:1; +   unsigned pad:15; +   unsigned opcode:16; +}; + + + +struct brw_binding_table_pointers +{ +   struct header header; +   unsigned vs; +   unsigned gs; +   unsigned clp; +   unsigned sf; +   unsigned wm; +}; + + +struct brw_blend_constant_color +{ +   struct header header; +   float blend_constant_color[4]; +}; + + +struct brw_depthbuffer +{ +   union header_union header; + +   union { +      struct { +	 unsigned pitch:18; +	 unsigned format:3; +	 unsigned pad:4; +	 unsigned depth_offset_disable:1; +	 unsigned tile_walk:1; +	 unsigned tiled_surface:1; +	 unsigned pad2:1; +	 unsigned surface_type:3; +      } bits; +      unsigned dword; +   } dword1; + +   unsigned dword2_base_addr; + +   union { +      struct { +	 unsigned pad:1; +	 unsigned mipmap_layout:1; +	 unsigned lod:4; +	 unsigned width:13; +	 unsigned height:13; +      } bits; +      unsigned dword; +   } dword3; + +   union { +      struct { +	 unsigned pad:12; +	 unsigned min_array_element:9; +	 unsigned depth:11; +      } bits; +      unsigned dword; +   } dword4; +}; + +struct brw_drawrect +{ +   struct header header; +   unsigned xmin:16; +   unsigned ymin:16; +   unsigned xmax:16; +   unsigned ymax:16; +   unsigned xorg:16; +   unsigned yorg:16; +}; + + + + +struct brw_global_depth_offset_clamp +{ +   struct header header; +   float depth_offset_clamp; +}; + +struct brw_indexbuffer +{ +   union { +      struct +      { +	 unsigned length:8; +	 unsigned index_format:2; +	 unsigned cut_index_enable:1; +	 unsigned pad:5; +	 unsigned opcode:16; +      } bits; +      unsigned dword; + +   } header; + +   unsigned buffer_start; +   unsigned buffer_end; +}; + + +struct brw_line_stipple +{ +   struct header header; + +   struct +   { +      unsigned pattern:16; +      unsigned pad:16; +   } bits0; + +   struct +   { +      unsigned repeat_count:9; +      unsigned pad:7; +      unsigned inverse_repeat_count:16; +   } bits1; +}; + + +struct brw_pipelined_state_pointers +{ +   struct header header; + +   struct { +      unsigned pad:5; +      unsigned offset:27; +   } vs; + +   struct +   { +      unsigned enable:1; +      unsigned pad:4; +      unsigned offset:27; +   } gs; + +   struct +   { +      unsigned enable:1; +      unsigned pad:4; +      unsigned offset:27; +   } clp; + +   struct +   { +      unsigned pad:5; +      unsigned offset:27; +   } sf; + +   struct +   { +      unsigned pad:5; +      unsigned offset:27; +   } wm; + +   struct +   { +      unsigned pad:5; +      unsigned offset:27; /* KW: check me! */ +   } cc; +}; + + +struct brw_polygon_stipple_offset +{ +   struct header header; + +   struct { +      unsigned y_offset:5; +      unsigned pad:3; +      unsigned x_offset:5; +      unsigned pad0:19; +   } bits0; +}; + + + +struct brw_polygon_stipple +{ +   struct header header; +   unsigned stipple[32]; +}; + + + +struct brw_pipeline_select +{ +   struct +   { +      unsigned pipeline_select:1; +      unsigned pad:15; +      unsigned opcode:16; +   } header; +}; + + +struct brw_pipe_control +{ +   struct +   { +      unsigned length:8; +      unsigned notify_enable:1; +      unsigned pad:2; +      unsigned instruction_state_cache_flush_enable:1; +      unsigned write_cache_flush_enable:1; +      unsigned depth_stall_enable:1; +      unsigned post_sync_operation:2; + +      unsigned opcode:16; +   } header; + +   struct +   { +      unsigned pad:2; +      unsigned dest_addr_type:1; +      unsigned dest_addr:29; +   } bits1; + +   unsigned data0; +   unsigned data1; +}; + + +struct brw_urb_fence +{ +   struct +   { +      unsigned length:8; +      unsigned vs_realloc:1; +      unsigned gs_realloc:1; +      unsigned clp_realloc:1; +      unsigned sf_realloc:1; +      unsigned vfe_realloc:1; +      unsigned cs_realloc:1; +      unsigned pad:2; +      unsigned opcode:16; +   } header; + +   struct +   { +      unsigned vs_fence:10; +      unsigned gs_fence:10; +      unsigned clp_fence:10; +      unsigned pad:2; +   } bits0; + +   struct +   { +      unsigned sf_fence:10; +      unsigned vf_fence:10; +      unsigned cs_fence:10; +      unsigned pad:2; +   } bits1; +}; + +struct brw_constant_buffer_state /* previously brw_command_streamer */ +{ +   struct header header; + +   struct +   { +      unsigned nr_urb_entries:3; +      unsigned pad:1; +      unsigned urb_entry_size:5; +      unsigned pad0:23; +   } bits0; +}; + +struct brw_constant_buffer +{ +   struct +   { +      unsigned length:8; +      unsigned valid:1; +      unsigned pad:7; +      unsigned opcode:16; +   } header; + +   struct +   { +      unsigned buffer_length:6; +      unsigned buffer_address:26; +   } bits0; +}; + +struct brw_state_base_address +{ +   struct header header; + +   struct +   { +      unsigned modify_enable:1; +      unsigned pad:4; +      unsigned general_state_address:27; +   } bits0; + +   struct +   { +      unsigned modify_enable:1; +      unsigned pad:4; +      unsigned surface_state_address:27; +   } bits1; + +   struct +   { +      unsigned modify_enable:1; +      unsigned pad:4; +      unsigned indirect_object_state_address:27; +   } bits2; + +   struct +   { +      unsigned modify_enable:1; +      unsigned pad:11; +      unsigned general_state_upper_bound:20; +   } bits3; + +   struct +   { +      unsigned modify_enable:1; +      unsigned pad:11; +      unsigned indirect_object_state_upper_bound:20; +   } bits4; +}; + +struct brw_state_prefetch +{ +   struct header header; + +   struct +   { +      unsigned prefetch_count:3; +      unsigned pad:3; +      unsigned prefetch_pointer:26; +   } bits0; +}; + +struct brw_system_instruction_pointer +{ +   struct header header; + +   struct +   { +      unsigned pad:4; +      unsigned system_instruction_pointer:28; +   } bits0; +}; + + + + +/* State structs for the various fixed function units: + */ + + +struct thread0 +{ +   unsigned pad0:1; +   unsigned grf_reg_count:3; +   unsigned pad1:2; +   unsigned kernel_start_pointer:26; +}; + +struct thread1 +{ +   unsigned ext_halt_exception_enable:1; +   unsigned sw_exception_enable:1; +   unsigned mask_stack_exception_enable:1; +   unsigned timeout_exception_enable:1; +   unsigned illegal_op_exception_enable:1; +   unsigned pad0:3; +   unsigned depth_coef_urb_read_offset:6;	/* WM only */ +   unsigned pad1:2; +   unsigned floating_point_mode:1; +   unsigned thread_priority:1; +   unsigned binding_table_entry_count:8; +   unsigned pad3:5; +   unsigned single_program_flow:1; +}; + +struct thread2 +{ +   unsigned per_thread_scratch_space:4; +   unsigned pad0:6; +   unsigned scratch_space_base_pointer:22; +}; + + +struct thread3 +{ +   unsigned dispatch_grf_start_reg:4; +   unsigned urb_entry_read_offset:6; +   unsigned pad0:1; +   unsigned urb_entry_read_length:6; +   unsigned pad1:1; +   unsigned const_urb_entry_read_offset:6; +   unsigned pad2:1; +   unsigned const_urb_entry_read_length:6; +   unsigned pad3:1; +}; + + + +struct brw_clip_unit_state +{ +   struct thread0 thread0; +   struct +   { +      unsigned pad0:7; +      unsigned sw_exception_enable:1; +      unsigned pad1:3; +      unsigned mask_stack_exception_enable:1; +      unsigned pad2:1; +      unsigned illegal_op_exception_enable:1; +      unsigned pad3:2; +      unsigned floating_point_mode:1; +      unsigned thread_priority:1; +      unsigned binding_table_entry_count:8; +      unsigned pad4:5; +      unsigned single_program_flow:1; +   } thread1; + +   struct thread2 thread2; +   struct thread3 thread3; + +   struct +   { +      unsigned pad0:9; +      unsigned gs_output_stats:1; /* not always */ +      unsigned stats_enable:1; +      unsigned nr_urb_entries:7; +      unsigned pad1:1; +      unsigned urb_entry_allocation_size:5; +      unsigned pad2:1; +      unsigned max_threads:1; 	/* may be less */ +      unsigned pad3:6; +   } thread4; + +   struct +   { +      unsigned pad0:13; +      unsigned clip_mode:3; +      unsigned userclip_enable_flags:8; +      unsigned userclip_must_clip:1; +      unsigned pad1:1; +      unsigned guard_band_enable:1; +      unsigned viewport_z_clip_enable:1; +      unsigned viewport_xy_clip_enable:1; +      unsigned vertex_position_space:1; +      unsigned api_mode:1; +      unsigned pad2:1; +   } clip5; + +   struct +   { +      unsigned pad0:5; +      unsigned clipper_viewport_state_ptr:27; +   } clip6; + + +   float viewport_xmin; +   float viewport_xmax; +   float viewport_ymin; +   float viewport_ymax; +}; + + + +struct brw_cc_unit_state +{ +   struct +   { +      unsigned pad0:3; +      unsigned bf_stencil_pass_depth_pass_op:3; +      unsigned bf_stencil_pass_depth_fail_op:3; +      unsigned bf_stencil_fail_op:3; +      unsigned bf_stencil_func:3; +      unsigned bf_stencil_enable:1; +      unsigned pad1:2; +      unsigned stencil_write_enable:1; +      unsigned stencil_pass_depth_pass_op:3; +      unsigned stencil_pass_depth_fail_op:3; +      unsigned stencil_fail_op:3; +      unsigned stencil_func:3; +      unsigned stencil_enable:1; +   } cc0; + + +   struct +   { +      unsigned bf_stencil_ref:8; +      unsigned stencil_write_mask:8; +      unsigned stencil_test_mask:8; +      unsigned stencil_ref:8; +   } cc1; + + +   struct +   { +      unsigned logicop_enable:1; +      unsigned pad0:10; +      unsigned depth_write_enable:1; +      unsigned depth_test_function:3; +      unsigned depth_test:1; +      unsigned bf_stencil_write_mask:8; +      unsigned bf_stencil_test_mask:8; +   } cc2; + + +   struct +   { +      unsigned pad0:8; +      unsigned alpha_test_func:3; +      unsigned alpha_test:1; +      unsigned blend_enable:1; +      unsigned ia_blend_enable:1; +      unsigned pad1:1; +      unsigned alpha_test_format:1; +      unsigned pad2:16; +   } cc3; + +   struct +   { +      unsigned pad0:5; +      unsigned cc_viewport_state_offset:27; +   } cc4; + +   struct +   { +      unsigned pad0:2; +      unsigned ia_dest_blend_factor:5; +      unsigned ia_src_blend_factor:5; +      unsigned ia_blend_function:3; +      unsigned statistics_enable:1; +      unsigned logicop_func:4; +      unsigned pad1:11; +      unsigned dither_enable:1; +   } cc5; + +   struct +   { +      unsigned clamp_post_alpha_blend:1; +      unsigned clamp_pre_alpha_blend:1; +      unsigned clamp_range:2; +      unsigned pad0:11; +      unsigned y_dither_offset:2; +      unsigned x_dither_offset:2; +      unsigned dest_blend_factor:5; +      unsigned src_blend_factor:5; +      unsigned blend_function:3; +   } cc6; + +   struct { +      union { +	 float f; +	 ubyte ub[4]; +      } alpha_ref; +   } cc7; +}; + + + +struct brw_sf_unit_state +{ +   struct thread0 thread0; +   struct thread1 thread1; +   struct thread2 thread2; +   struct thread3 thread3; + +   struct +   { +      unsigned pad0:10; +      unsigned stats_enable:1; +      unsigned nr_urb_entries:7; +      unsigned pad1:1; +      unsigned urb_entry_allocation_size:5; +      unsigned pad2:1; +      unsigned max_threads:6; +      unsigned pad3:1; +   } thread4; + +   struct +   { +      unsigned front_winding:1; +      unsigned viewport_transform:1; +      unsigned pad0:3; +      unsigned sf_viewport_state_offset:27; +   } sf5; + +   struct +   { +      unsigned pad0:9; +      unsigned dest_org_vbias:4; +      unsigned dest_org_hbias:4; +      unsigned scissor:1; +      unsigned disable_2x2_trifilter:1; +      unsigned disable_zero_pix_trifilter:1; +      unsigned point_rast_rule:2; +      unsigned line_endcap_aa_region_width:2; +      unsigned line_width:4; +      unsigned fast_scissor_disable:1; +      unsigned cull_mode:2; +      unsigned aa_enable:1; +   } sf6; + +   struct +   { +      unsigned point_size:11; +      unsigned use_point_size_state:1; +      unsigned subpixel_precision:1; +      unsigned sprite_point:1; +      unsigned pad0:11; +      unsigned trifan_pv:2; +      unsigned linestrip_pv:2; +      unsigned tristrip_pv:2; +      unsigned line_last_pixel_enable:1; +   } sf7; + +}; + + +struct brw_gs_unit_state +{ +   struct thread0 thread0; +   struct thread1 thread1; +   struct thread2 thread2; +   struct thread3 thread3; + +   struct +   { +      unsigned pad0:10; +      unsigned stats_enable:1; +      unsigned nr_urb_entries:7; +      unsigned pad1:1; +      unsigned urb_entry_allocation_size:5; +      unsigned pad2:1; +      unsigned max_threads:1; +      unsigned pad3:6; +   } thread4; + +   struct +   { +      unsigned sampler_count:3; +      unsigned pad0:2; +      unsigned sampler_state_pointer:27; +   } gs5; + + +   struct +   { +      unsigned max_vp_index:4; +      unsigned pad0:26; +      unsigned reorder_enable:1; +      unsigned pad1:1; +   } gs6; +}; + + +struct brw_vs_unit_state +{ +   struct thread0 thread0; +   struct thread1 thread1; +   struct thread2 thread2; +   struct thread3 thread3; + +   struct +   { +      unsigned pad0:10; +      unsigned stats_enable:1; +      unsigned nr_urb_entries:7; +      unsigned pad1:1; +      unsigned urb_entry_allocation_size:5; +      unsigned pad2:1; +      unsigned max_threads:4; +      unsigned pad3:3; +   } thread4; + +   struct +   { +      unsigned sampler_count:3; +      unsigned pad0:2; +      unsigned sampler_state_pointer:27; +   } vs5; + +   struct +   { +      unsigned vs_enable:1; +      unsigned vert_cache_disable:1; +      unsigned pad0:30; +   } vs6; +}; + + +struct brw_wm_unit_state +{ +   struct thread0 thread0; +   struct thread1 thread1; +   struct thread2 thread2; +   struct thread3 thread3; + +   struct { +      unsigned stats_enable:1; +      unsigned pad0:1; +      unsigned sampler_count:3; +      unsigned sampler_state_pointer:27; +   } wm4; + +   struct +   { +      unsigned enable_8_pix:1; +      unsigned enable_16_pix:1; +      unsigned enable_32_pix:1; +      unsigned pad0:7; +      unsigned legacy_global_depth_bias:1; +      unsigned line_stipple:1; +      unsigned depth_offset:1; +      unsigned polygon_stipple:1; +      unsigned line_aa_region_width:2; +      unsigned line_endcap_aa_region_width:2; +      unsigned early_depth_test:1; +      unsigned thread_dispatch_enable:1; +      unsigned program_uses_depth:1; +      unsigned program_computes_depth:1; +      unsigned program_uses_killpixel:1; +      unsigned legacy_line_rast: 1; +      unsigned pad1:1; +      unsigned max_threads:6; +      unsigned pad2:1; +   } wm5; + +   float global_depth_offset_constant; +   float global_depth_offset_scale; +}; + +struct brw_sampler_default_color { +   float color[4]; +}; + +struct brw_sampler_state +{ + +   struct +   { +      unsigned shadow_function:3; +      unsigned lod_bias:11; +      unsigned min_filter:3; +      unsigned mag_filter:3; +      unsigned mip_filter:2; +      unsigned base_level:5; +      unsigned pad:1; +      unsigned lod_preclamp:1; +      unsigned default_color_mode:1; +      unsigned pad0:1; +      unsigned disable:1; +   } ss0; + +   struct +   { +      unsigned r_wrap_mode:3; +      unsigned t_wrap_mode:3; +      unsigned s_wrap_mode:3; +      unsigned pad:3; +      unsigned max_lod:10; +      unsigned min_lod:10; +   } ss1; + + +   struct +   { +      unsigned pad:5; +      unsigned default_color_pointer:27; +   } ss2; + +   struct +   { +      unsigned pad:19; +      unsigned max_aniso:3; +      unsigned chroma_key_mode:1; +      unsigned chroma_key_index:2; +      unsigned chroma_key_enable:1; +      unsigned monochrome_filter_width:3; +      unsigned monochrome_filter_height:3; +   } ss3; +}; + + +struct brw_clipper_viewport +{ +   float xmin; +   float xmax; +   float ymin; +   float ymax; +}; + +struct brw_cc_viewport +{ +   float min_depth; +   float max_depth; +}; + +struct brw_sf_viewport +{ +   struct { +      float m00; +      float m11; +      float m22; +      float m30; +      float m31; +      float m32; +   } viewport; + +   struct { +      short xmin; +      short ymin; +      short xmax; +      short ymax; +   } scissor; +}; + +/* Documented in the subsystem/shared-functions/sampler chapter... + */ +struct brw_surface_state +{ +   struct { +      unsigned cube_pos_z:1; +      unsigned cube_neg_z:1; +      unsigned cube_pos_y:1; +      unsigned cube_neg_y:1; +      unsigned cube_pos_x:1; +      unsigned cube_neg_x:1; +      unsigned pad:4; +      unsigned mipmap_layout_mode:1; +      unsigned vert_line_stride_ofs:1; +      unsigned vert_line_stride:1; +      unsigned color_blend:1; +      unsigned writedisable_blue:1; +      unsigned writedisable_green:1; +      unsigned writedisable_red:1; +      unsigned writedisable_alpha:1; +      unsigned surface_format:9; +      unsigned data_return_format:1; +      unsigned pad0:1; +      unsigned surface_type:3; +   } ss0; + +   struct { +      unsigned base_addr; +   } ss1; + +   struct { +      unsigned pad:2; +      unsigned mip_count:4; +      unsigned width:13; +      unsigned height:13; +   } ss2; + +   struct { +      unsigned tile_walk:1; +      unsigned tiled_surface:1; +      unsigned pad:1; +      unsigned pitch:18; +      unsigned depth:11; +   } ss3; + +   struct { +      unsigned pad:19; +      unsigned min_array_elt:9; +      unsigned min_lod:4; +   } ss4; +}; + + + +struct brw_vertex_buffer_state +{ +   struct { +      unsigned pitch:11; +      unsigned pad:15; +      unsigned access_type:1; +      unsigned vb_index:5; +   } vb0; + +   unsigned start_addr; +   unsigned max_index; +#if 1 +   unsigned instance_data_step_rate; /* not included for sequential/random vertices? */ +#endif +}; + +#define BRW_VBP_MAX 17 + +struct brw_vb_array_state { +   struct header header; +   struct brw_vertex_buffer_state vb[BRW_VBP_MAX]; +}; + + +struct brw_vertex_element_state +{ +   struct +   { +      unsigned src_offset:11; +      unsigned pad:5; +      unsigned src_format:9; +      unsigned pad0:1; +      unsigned valid:1; +      unsigned vertex_buffer_index:5; +   } ve0; + +   struct +   { +      unsigned dst_offset:8; +      unsigned pad:8; +      unsigned vfcomponent3:4; +      unsigned vfcomponent2:4; +      unsigned vfcomponent1:4; +      unsigned vfcomponent0:4; +   } ve1; +}; + +#define BRW_VEP_MAX 18 + +struct brw_vertex_element_packet { +   struct header header; +   struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */ +}; + + +struct brw_urb_immediate { +   unsigned opcode:4; +   unsigned offset:6; +   unsigned swizzle_control:2; +   unsigned pad:1; +   unsigned allocate:1; +   unsigned used:1; +   unsigned complete:1; +   unsigned response_length:4; +   unsigned msg_length:4; +   unsigned msg_target:4; +   unsigned pad1:3; +   unsigned end_of_thread:1; +}; + +/* Instruction format for the execution units: + */ + +struct brw_instruction +{ +   struct +   { +      unsigned opcode:7; +      unsigned pad:1; +      unsigned access_mode:1; +      unsigned mask_control:1; +      unsigned dependency_control:2; +      unsigned compression_control:2; +      unsigned thread_control:2; +      unsigned predicate_control:4; +      unsigned predicate_inverse:1; +      unsigned execution_size:3; +      unsigned destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ +      unsigned pad0:2; +      unsigned debug_control:1; +      unsigned saturate:1; +   } header; + +   union { +      struct +      { +	 unsigned dest_reg_file:2; +	 unsigned dest_reg_type:3; +	 unsigned src0_reg_file:2; +	 unsigned src0_reg_type:3; +	 unsigned src1_reg_file:2; +	 unsigned src1_reg_type:3; +	 unsigned pad:1; +	 unsigned dest_subreg_nr:5; +	 unsigned dest_reg_nr:8; +	 unsigned dest_horiz_stride:2; +	 unsigned dest_address_mode:1; +      } da1; + +      struct +      { +	 unsigned dest_reg_file:2; +	 unsigned dest_reg_type:3; +	 unsigned src0_reg_file:2; +	 unsigned src0_reg_type:3; +	 unsigned pad:6; +	 int dest_indirect_offset:10;	/* offset against the deref'd address reg */ +	 unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */ +	 unsigned dest_horiz_stride:2; +	 unsigned dest_address_mode:1; +      } ia1; + +      struct +      { +	 unsigned dest_reg_file:2; +	 unsigned dest_reg_type:3; +	 unsigned src0_reg_file:2; +	 unsigned src0_reg_type:3; +	 unsigned src1_reg_file:2; +	 unsigned src1_reg_type:3; +	 unsigned pad0:1; +	 unsigned dest_writemask:4; +	 unsigned dest_subreg_nr:1; +	 unsigned dest_reg_nr:8; +	 unsigned pad1:2; +	 unsigned dest_address_mode:1; +      } da16; + +      struct +      { +	 unsigned dest_reg_file:2; +	 unsigned dest_reg_type:3; +	 unsigned src0_reg_file:2; +	 unsigned src0_reg_type:3; +	 unsigned pad0:6; +	 unsigned dest_writemask:4; +	 int dest_indirect_offset:6; +	 unsigned dest_subreg_nr:3; +	 unsigned pad1:2; +	 unsigned dest_address_mode:1; +      } ia16; +   } bits1; + + +   union { +      struct +      { +	 unsigned src0_subreg_nr:5; +	 unsigned src0_reg_nr:8; +	 unsigned src0_abs:1; +	 unsigned src0_negate:1; +	 unsigned src0_address_mode:1; +	 unsigned src0_horiz_stride:2; +	 unsigned src0_width:3; +	 unsigned src0_vert_stride:4; +	 unsigned flag_reg_nr:1; +	 unsigned pad:6; +      } da1; + +      struct +      { +	 int src0_indirect_offset:10; +	 unsigned src0_subreg_nr:3; +	 unsigned src0_abs:1; +	 unsigned src0_negate:1; +	 unsigned src0_address_mode:1; +	 unsigned src0_horiz_stride:2; +	 unsigned src0_width:3; +	 unsigned src0_vert_stride:4; +	 unsigned flag_reg_nr:1; +	 unsigned pad:6; +      } ia1; + +      struct +      { +	 unsigned src0_swz_x:2; +	 unsigned src0_swz_y:2; +	 unsigned src0_subreg_nr:1; +	 unsigned src0_reg_nr:8; +	 unsigned src0_abs:1; +	 unsigned src0_negate:1; +	 unsigned src0_address_mode:1; +	 unsigned src0_swz_z:2; +	 unsigned src0_swz_w:2; +	 unsigned pad0:1; +	 unsigned src0_vert_stride:4; +	 unsigned flag_reg_nr:1; +	 unsigned pad1:6; +      } da16; + +      struct +      { +	 unsigned src0_swz_x:2; +	 unsigned src0_swz_y:2; +	 int src0_indirect_offset:6; +	 unsigned src0_subreg_nr:3; +	 unsigned src0_abs:1; +	 unsigned src0_negate:1; +	 unsigned src0_address_mode:1; +	 unsigned src0_swz_z:2; +	 unsigned src0_swz_w:2; +	 unsigned pad0:1; +	 unsigned src0_vert_stride:4; +	 unsigned flag_reg_nr:1; +	 unsigned pad1:6; +      } ia16; + +   } bits2; + +   union +   { +      struct +      { +	 unsigned src1_subreg_nr:5; +	 unsigned src1_reg_nr:8; +	 unsigned src1_abs:1; +	 unsigned src1_negate:1; +	 unsigned pad:1; +	 unsigned src1_horiz_stride:2; +	 unsigned src1_width:3; +	 unsigned src1_vert_stride:4; +	 unsigned pad0:7; +      } da1; + +      struct +      { +	 unsigned src1_swz_x:2; +	 unsigned src1_swz_y:2; +	 unsigned src1_subreg_nr:1; +	 unsigned src1_reg_nr:8; +	 unsigned src1_abs:1; +	 unsigned src1_negate:1; +	 unsigned pad0:1; +	 unsigned src1_swz_z:2; +	 unsigned src1_swz_w:2; +	 unsigned pad1:1; +	 unsigned src1_vert_stride:4; +	 unsigned pad2:7; +      } da16; + +      struct +      { +	 int  src1_indirect_offset:10; +	 unsigned src1_subreg_nr:3; +	 unsigned src1_abs:1; +	 unsigned src1_negate:1; +	 unsigned pad0:1; +	 unsigned src1_horiz_stride:2; +	 unsigned src1_width:3; +	 unsigned src1_vert_stride:4; +	 unsigned flag_reg_nr:1; +	 unsigned pad1:6; +      } ia1; + +      struct +      { +	 unsigned src1_swz_x:2; +	 unsigned src1_swz_y:2; +	 int  src1_indirect_offset:6; +	 unsigned src1_subreg_nr:3; +	 unsigned src1_abs:1; +	 unsigned src1_negate:1; +	 unsigned pad0:1; +	 unsigned src1_swz_z:2; +	 unsigned src1_swz_w:2; +	 unsigned pad1:1; +	 unsigned src1_vert_stride:4; +	 unsigned flag_reg_nr:1; +	 unsigned pad2:6; +      } ia16; + + +      struct +      { +	 int  jump_count:16;	/* note: signed */ +	 unsigned  pop_count:4; +	 unsigned  pad0:12; +      } if_else; + +      struct { +	 unsigned function:4; +	 unsigned int_type:1; +	 unsigned precision:1; +	 unsigned saturate:1; +	 unsigned data_type:1; +	 unsigned pad0:8; +	 unsigned response_length:4; +	 unsigned msg_length:4; +	 unsigned msg_target:4; +	 unsigned pad1:3; +	 unsigned end_of_thread:1; +      } math; + +      struct { +	 unsigned binding_table_index:8; +	 unsigned sampler:4; +	 unsigned return_format:2; +	 unsigned msg_type:2; +	 unsigned response_length:4; +	 unsigned msg_length:4; +	 unsigned msg_target:4; +	 unsigned pad1:3; +	 unsigned end_of_thread:1; +      } sampler; + +      struct brw_urb_immediate urb; + +      struct { +	 unsigned binding_table_index:8; +	 unsigned msg_control:4; +	 unsigned msg_type:2; +	 unsigned target_cache:2; +	 unsigned response_length:4; +	 unsigned msg_length:4; +	 unsigned msg_target:4; +	 unsigned pad1:3; +	 unsigned end_of_thread:1; +      } dp_read; + +      struct { +	 unsigned binding_table_index:8; +	 unsigned msg_control:3; +	 unsigned pixel_scoreboard_clear:1; +	 unsigned msg_type:3; +	 unsigned send_commit_msg:1; +	 unsigned response_length:4; +	 unsigned msg_length:4; +	 unsigned msg_target:4; +	 unsigned pad1:3; +	 unsigned end_of_thread:1; +      } dp_write; + +      struct { +	 unsigned pad:16; +	 unsigned response_length:4; +	 unsigned msg_length:4; +	 unsigned msg_target:4; +	 unsigned pad1:3; +	 unsigned end_of_thread:1; +      } generic; + +      int d; +      unsigned ud; +   } bits3; +}; + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c new file mode 100644 index 0000000000..0a95dce194 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -0,0 +1,127 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "brw_blit.h" +#include "brw_context.h" +#include "brw_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_tile.h" +#include "util/u_rect.h" + + + +/* Assumes all values are within bounds -- no checking at this level - + * do it higher up if required. + */ +static void +brw_surface_copy(struct pipe_context *pipe, +                 boolean do_flip, +                 struct pipe_surface *dst, +                 unsigned dstx, unsigned dsty, +                 struct pipe_surface *src, +                 unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ +   assert( dst != src ); +   assert( dst->block.size == src->block.size ); +   assert( dst->block.width == src->block.height ); +   assert( dst->block.height == src->block.height ); + +   if (0) { +      void *dst_map = pipe->screen->surface_map( pipe->screen, +                                                 dst, +                                                 PIPE_BUFFER_USAGE_CPU_WRITE ); +       +      const void *src_map = pipe->screen->surface_map( pipe->screen, +                                                       src, +                                                       PIPE_BUFFER_USAGE_CPU_READ ); +       +      pipe_copy_rect(dst_map, +                     &dst->block, +                     dst->stride, +                     dstx, dsty,  +                     width, height,  +                     src_map,  +                     do_flip ? -(int) src->stride : src->stride,  +                     srcx, do_flip ? height - 1 - srcy : srcy); + +      pipe->screen->surface_unmap(pipe->screen, src); +      pipe->screen->surface_unmap(pipe->screen, dst); +   } +   else { +      struct brw_texture *dst_tex = (struct brw_texture *)dst->texture; +      struct brw_texture *src_tex = (struct brw_texture *)src->texture; +      assert(dst->block.width == 1); +      assert(dst->block.height == 1); +      brw_copy_blit(brw_context(pipe), +                    do_flip, +                    dst->block.size, +                    (short) src->stride/src->block.size, src_tex->buffer, src->offset, FALSE, +                    (short) dst->stride/dst->block.size, dst_tex->buffer, dst->offset, FALSE, +                    (short) srcx, (short) srcy, (short) dstx, (short) dsty, +                    (short) width, (short) height, PIPE_LOGICOP_COPY); +   } +} + + +static void +brw_surface_fill(struct pipe_context *pipe, +                 struct pipe_surface *dst, +                 unsigned dstx, unsigned dsty, +                 unsigned width, unsigned height, unsigned value) +{ +   if (0) { +      void *dst_map = pipe->screen->surface_map( pipe->screen, +                                                 dst, +                                                 PIPE_BUFFER_USAGE_CPU_WRITE ); + +      pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); + +      pipe->screen->surface_unmap(pipe->screen, dst); +   } +   else { +      struct brw_texture *tex = (struct brw_texture *)dst->texture; +      assert(dst->block.width == 1); +      assert(dst->block.height == 1); +      brw_fill_blit(brw_context(pipe), +                    dst->block.size, +                    (short) dst->stride/dst->block.size,  +                    tex->buffer, dst->offset, FALSE, +                    (short) dstx, (short) dsty, +                    (short) width, (short) height, +                    value); +   } +} + + +void +brw_init_surface_functions(struct brw_context *brw) +{ +   brw->pipe.surface_copy  = brw_surface_copy; +   brw->pipe.surface_fill  = brw_surface_fill; +} diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c new file mode 100644 index 0000000000..448229ed4e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -0,0 +1,398 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +/* Code to layout images in a mipmap tree for i965. + */ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "brw_context.h" +#include "brw_tex_layout.h" + + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +#if 0 +unsigned intel_compressed_alignment(unsigned internalFormat) +{ +    unsigned alignment = 4; + +    switch (internalFormat) { +    case GL_COMPRESSED_RGB_FXT1_3DFX: +    case GL_COMPRESSED_RGBA_FXT1_3DFX: +        alignment = 8; +        break; + +    default: +        break; +    } + +    return alignment; +} +#endif + +static unsigned minify( unsigned d ) +{ +   return MAX2(1, d>>1); +} + + +static void intel_miptree_set_image_offset(struct brw_texture *tex, +                                           unsigned level, +                                           unsigned img, +                                           unsigned x, unsigned y) +{ +   struct pipe_texture *pt = &tex->base; +   if (img == 0 && level == 0) +      assert(x == 0 && y == 0); +   assert(img < tex->nr_images[level]); + +   tex->image_offset[level][img] = y * tex->stride + x * pt->block.size; +} + +static void intel_miptree_set_level_info(struct brw_texture *tex, +                                         unsigned level, +                                         unsigned nr_images, +                                         unsigned x, unsigned y, +                                         unsigned w, unsigned h, unsigned d) +{ +   struct pipe_texture *pt = &tex->base; + +   assert(level < PIPE_MAX_TEXTURE_LEVELS); + +   pt->width[level] = w; +   pt->height[level] = h; +   pt->depth[level] = d; +    +   pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); +   pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); + +   tex->level_offset[level] = y * tex->stride + x * tex->base.block.size; +   tex->nr_images[level] = nr_images; + +   /* +   DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, +       level, w, h, d, x, y, tex->level_offset[level]); +   */ + +   /* Not sure when this would happen, but anyway:  +    */ +   if (tex->image_offset[level]) { +      FREE(tex->image_offset[level]); +      tex->image_offset[level] = NULL; +   } + +   assert(nr_images); +   assert(!tex->image_offset[level]); + +   tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned)); +   tex->image_offset[level][0] = 0; +} + +static void i945_miptree_layout_2d(struct brw_texture *tex) +{ +   struct pipe_texture *pt = &tex->base; +   const int align_x = 2, align_y = 4; +   unsigned level; +   unsigned x = 0; +   unsigned y = 0; +   unsigned width = pt->width[0]; +   unsigned height = pt->height[0]; +   unsigned nblocksx = pt->nblocksx[0]; +   unsigned nblocksy = pt->nblocksy[0]; + +   tex->stride = align(pt->nblocksx[0] * pt->block.size, 4); + +   /* May need to adjust pitch to accomodate the placement of +    * the 2nd mipmap level.  This occurs when the alignment +    * constraints of mipmap placement push the right edge of the +    * 2nd mipmap level out past the width of its parent. +    */ +   if (pt->last_level > 0) { +      unsigned mip1_nblocksx  +	 = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) +         + pf_get_nblocksx(&pt->block, minify(minify(width))); + +      if (mip1_nblocksx > nblocksx) +	 tex->stride = mip1_nblocksx * pt->block.size; +   } + +   /* Pitch must be a whole number of dwords +    */ +   tex->stride = align(tex->stride, 64); +   tex->total_nblocksy = 0; + +   for (level = 0; level <= pt->last_level; level++) { +      intel_miptree_set_level_info(tex, level, 1, x, y, width, +				   height, 1); + +      nblocksy = align(nblocksy, align_y); + +      /* Because the images are packed better, the final offset +       * might not be the maximal one: +       */ +      tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy); + +      /* Layout_below: step right after second mipmap level. +       */ +      if (level == 1) { +	 x += align(nblocksx, align_x); +      } +      else { +	 y += nblocksy; +      } + +      width  = minify(width); +      height = minify(height); +      nblocksx = pf_get_nblocksx(&pt->block, width); +      nblocksy = pf_get_nblocksy(&pt->block, height); +   } +} + +static boolean brw_miptree_layout(struct brw_texture *tex) +{ +   struct pipe_texture *pt = &tex->base; +   /* XXX: these vary depending on image format: +    */ +/*    int align_w = 4; */ + +   switch (pt->target) { +   case PIPE_TEXTURE_CUBE: +   case PIPE_TEXTURE_3D: { +      unsigned width  = pt->width[0]; +      unsigned height = pt->height[0]; +      unsigned depth = pt->depth[0]; +      unsigned nblocksx = pt->nblocksx[0]; +      unsigned nblocksy = pt->nblocksy[0]; +      unsigned pack_x_pitch, pack_x_nr; +      unsigned pack_y_pitch; +      unsigned level; +      unsigned align_h = 2; +      unsigned align_w = 4; + +      tex->total_nblocksy = 0; + +      tex->stride = align(pt->nblocksx[0], 4); +      pack_y_pitch = align(pt->nblocksy[0], align_h); + +      pack_x_pitch = tex->stride / pt->block.size; +      pack_x_nr = 1; + +      for (level = 0; level <= pt->last_level; level++) { +	 unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6; +	 int x = 0; +	 int y = 0; +	 uint q, j; + +	 intel_miptree_set_level_info(tex, level, nr_images, +				      0, tex->total_nblocksy, +				      width, height, depth); + +	 for (q = 0; q < nr_images;) { +	    for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { +	       intel_miptree_set_image_offset(tex, level, q, x, y); +	       x += pack_x_pitch; +	    } + +	    x = 0; +	    y += pack_y_pitch; +	 } + + +	 tex->total_nblocksy += y; +	 width  = minify(width); +	 height = minify(height); +	 depth  = minify(depth); +         nblocksx = pf_get_nblocksx(&pt->block, width); +         nblocksy = pf_get_nblocksy(&pt->block, height); + +         if (pt->compressed) { +            pack_y_pitch = (height + 3) / 4; + +            if (pack_x_pitch > align(width, align_w)) { +               pack_x_pitch = align(width, align_w); +               pack_x_nr <<= 1; +            } +         } else { +            if (pack_x_pitch > 4) { +               pack_x_pitch >>= 1; +               pack_x_nr <<= 1; +               assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride); +            } + +            if (pack_y_pitch > 2) { +               pack_y_pitch >>= 1; +               pack_y_pitch = align(pack_y_pitch, align_h); +            } +         } + +      } +      break; +   } + +   default: +      i945_miptree_layout_2d(tex); +      break; +   } +#if 0 +   PRINT("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, +       pt->pitch, +       pt->total_nblocksy, +       pt->block.size, +       pt->stride * pt->total_nblocksy ); +#endif + +   return TRUE; +} + + +static struct pipe_texture * +brw_texture_create_screen(struct pipe_screen *screen, +                          const struct pipe_texture *templat) +{ +   struct pipe_winsys *ws = screen->winsys; +   struct brw_texture *tex = CALLOC_STRUCT(brw_texture); + +   if (tex) { +      tex->base = *templat; +      tex->base.refcount = 1; + +      tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); +      tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); +    +      if (brw_miptree_layout(tex)) +	 tex->buffer = ws->buffer_create(ws, 64, +                                          PIPE_BUFFER_USAGE_PIXEL, +                                          tex->stride * +                                          tex->total_nblocksy); + +      if (!tex->buffer) { +	 FREE(tex); +         return NULL; +      } +   } + +   return &tex->base; +} + + +static void +brw_texture_release_screen(struct pipe_screen *screen, +                           struct pipe_texture **pt) +{ +   if (!*pt) +      return; + +   /* +   DBG("%s %p refcount will be %d\n", +       __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); +   */ +   if (--(*pt)->refcount <= 0) { +      struct brw_texture *tex = (struct brw_texture *)*pt; +      uint i; + +      /* +      DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); +      */ + +      pipe_buffer_reference(screen, &tex->buffer, NULL); + +      for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) +         if (tex->image_offset[i]) +            free(tex->image_offset[i]); + +      free(tex); +   } +   *pt = NULL; +} + + +static struct pipe_surface * +brw_get_tex_surface_screen(struct pipe_screen *screen, +                           struct pipe_texture *pt, +                           unsigned face, unsigned level, unsigned zslice) +{ +   struct brw_texture *tex = (struct brw_texture *)pt; +   struct pipe_surface *ps; +   unsigned offset;  /* in bytes */ + +   offset = tex->level_offset[level]; + +   if (pt->target == PIPE_TEXTURE_CUBE) { +      offset += tex->image_offset[level][face]; +   } +   else if (pt->target == PIPE_TEXTURE_3D) { +      offset += tex->image_offset[level][zslice]; +   } +   else { +      assert(face == 0); +      assert(zslice == 0); +   } + +   ps = CALLOC_STRUCT(pipe_surface); +   if (ps) { +      ps->refcount = 1; +      pipe_texture_reference(&ps->texture, pt); +      ps->format = pt->format; +      ps->width = pt->width[level]; +      ps->height = pt->height[level]; +      ps->block = pt->block; +      ps->nblocksx = pt->nblocksx[level]; +      ps->nblocksy = pt->nblocksy[level]; +      ps->stride = tex->stride; +      ps->offset = offset; +      ps->status = PIPE_SURFACE_STATUS_DEFINED; +   } +   return ps; +} + + +void +brw_init_texture_functions(struct brw_context *brw) +{ +//   brw->pipe.texture_update = brw_texture_update; +} + + +void +brw_init_screen_texture_funcs(struct pipe_screen *screen) +{ +   screen->texture_create  = brw_texture_create_screen; +   screen->texture_release = brw_texture_release_screen; +   screen->get_tex_surface = brw_get_tex_surface_screen; +} + diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.h b/src/gallium/drivers/i965simple/brw_tex_layout.h new file mode 100644 index 0000000000..a6b6ba8146 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_tex_layout.h @@ -0,0 +1,44 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + + +#ifndef BRW_TEX_LAYOUT_H +#define BRW_TEX_LAYOUT_H + + +struct brw_context; +struct pipe_screen; + + +extern void +brw_init_texture_functions(struct brw_context *brw); + +extern void +brw_init_screen_texture_funcs(struct pipe_screen *screen); + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_urb.c b/src/gallium/drivers/i965simple/brw_urb.c new file mode 100644 index 0000000000..101a4367b9 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_urb.c @@ -0,0 +1,186 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#include "brw_context.h" +//#include "brw_state.h" +#include "brw_batch.h" +#include "brw_defines.h" + +#define VS 0 +#define GS 1 +#define CLP 2 +#define SF 3 +#define CS 4 + +/* XXX: Are the min_entry_size numbers useful? + * XXX: Verify min_nr_entries, esp for VS. + * XXX: Verify SF min_entry_size. + */ +static const struct { +   unsigned min_nr_entries; +   unsigned preferred_nr_entries; +   unsigned min_entry_size; +   unsigned max_entry_size; +} limits[CS+1] = { +   { 8, 32, 1, 5 },			/* vs */ +   { 4, 8,  1, 5 },			/* gs */ +   { 6, 8,  1, 5 },			/* clp */ +   { 1, 8,  1, 12 },		        /* sf */ +   { 1, 4,  1, 32 }			/* cs */ +}; + + +static boolean check_urb_layout( struct brw_context *brw ) +{ +   brw->urb.vs_start = 0; +   brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize; +   brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize; +   brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize; +   brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize; + +   return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= 256; +} + +/* Most minimal update, forces re-emit of URB fence packet after GS + * unit turned on/off. + */ +static void recalculate_urb_fence( struct brw_context *brw ) +{ +   unsigned csize = brw->curbe.total_size; +   unsigned vsize = brw->vs.prog_data->urb_entry_size; +   unsigned sfsize = brw->sf.prog_data->urb_entry_size; + +   if (csize < limits[CS].min_entry_size) +      csize = limits[CS].min_entry_size; + +   if (vsize < limits[VS].min_entry_size) +      vsize = limits[VS].min_entry_size; + +   if (sfsize < limits[SF].min_entry_size) +      sfsize = limits[SF].min_entry_size; + +   if (brw->urb.vsize < vsize || +       brw->urb.sfsize < sfsize || +       brw->urb.csize < csize || +       (brw->urb.constrained && (brw->urb.vsize > brw->urb.vsize || +				 brw->urb.sfsize > brw->urb.sfsize || +				 brw->urb.csize > brw->urb.csize))) { + + +      brw->urb.csize = csize; +      brw->urb.sfsize = sfsize; +      brw->urb.vsize = vsize; + +      brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; +      brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries; +      brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries; +      brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; +      brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries; + +      if (!check_urb_layout(brw)) { +	 brw->urb.nr_vs_entries = limits[VS].min_nr_entries; +	 brw->urb.nr_gs_entries = limits[GS].min_nr_entries; +	 brw->urb.nr_clip_entries = limits[CLP].min_nr_entries; +	 brw->urb.nr_sf_entries = limits[SF].min_nr_entries; +	 brw->urb.nr_cs_entries = limits[CS].min_nr_entries; + +	 brw->urb.constrained = 1; + +	 if (!check_urb_layout(brw)) { +	    /* This is impossible, given the maximal sizes of urb +	     * entries and the values for minimum nr of entries +	     * provided above. +	     */ +	    debug_printf("couldn't calculate URB layout!\n"); +	    exit(1); +	 } + +	 if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) +	    debug_printf("URB CONSTRAINED\n"); +      } +      else +	 brw->urb.constrained = 0; + +      if (BRW_DEBUG & DEBUG_URB) +	 debug_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", +		      brw->urb.vs_start, +		      brw->urb.gs_start, +		      brw->urb.clip_start, +		      brw->urb.sf_start, +		      brw->urb.cs_start, +		      256); + +      brw->state.dirty.brw |= BRW_NEW_URB_FENCE; +   } +} + + +const struct brw_tracked_state brw_recalculate_urb_fence = { +   .dirty = { +      .brw = BRW_NEW_CURBE_OFFSETS, +      .cache = (CACHE_NEW_VS_PROG | +		CACHE_NEW_SF_PROG) +   }, +   .update = recalculate_urb_fence +}; + + + + + +void brw_upload_urb_fence(struct brw_context *brw) +{ +   struct brw_urb_fence uf; +   memset(&uf, 0, sizeof(uf)); + +   uf.header.opcode = CMD_URB_FENCE; +   uf.header.length = sizeof(uf)/4-2; +   uf.header.vs_realloc = 1; +   uf.header.gs_realloc = 1; +   uf.header.clp_realloc = 1; +   uf.header.sf_realloc = 1; +   uf.header.vfe_realloc = 1; +   uf.header.cs_realloc = 1; + +   /* The ordering below is correct, not the layout in the +    * instruction. +    * +    * There are 256 urb reg pairs in total. +    */ +   uf.bits0.vs_fence  = brw->urb.gs_start; +   uf.bits0.gs_fence  = brw->urb.clip_start; +   uf.bits0.clp_fence = brw->urb.sf_start; +   uf.bits1.sf_fence  = brw->urb.cs_start; +   uf.bits1.cs_fence  = 256; + +   BRW_BATCH_STRUCT(brw, &uf); +} diff --git a/src/gallium/drivers/i965simple/brw_util.c b/src/gallium/drivers/i965simple/brw_util.c new file mode 100644 index 0000000000..42391d7c8c --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_util.c @@ -0,0 +1,104 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#include "brw_util.h" +#include "brw_defines.h" + +#include "pipe/p_defines.h" + +unsigned brw_count_bits( unsigned val ) +{ +   unsigned i; +   for (i = 0; val ; val >>= 1) +      if (val & 1) +	 i++; +   return i; +} + + +unsigned brw_translate_blend_equation( int mode ) +{ +   switch (mode) { +   case PIPE_BLEND_ADD: +      return BRW_BLENDFUNCTION_ADD; +   case PIPE_BLEND_MIN: +      return BRW_BLENDFUNCTION_MIN; +   case PIPE_BLEND_MAX: +      return BRW_BLENDFUNCTION_MAX; +   case PIPE_BLEND_SUBTRACT: +      return BRW_BLENDFUNCTION_SUBTRACT; +   case PIPE_BLEND_REVERSE_SUBTRACT: +      return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; +   default: +      assert(0); +      return BRW_BLENDFUNCTION_ADD; +   } +} + +unsigned brw_translate_blend_factor( int factor ) +{ +   switch(factor) { +   case PIPE_BLENDFACTOR_ZERO: +      return BRW_BLENDFACTOR_ZERO; +   case PIPE_BLENDFACTOR_SRC_ALPHA: +      return BRW_BLENDFACTOR_SRC_ALPHA; +   case PIPE_BLENDFACTOR_ONE: +      return BRW_BLENDFACTOR_ONE; +   case PIPE_BLENDFACTOR_SRC_COLOR: +      return BRW_BLENDFACTOR_SRC_COLOR; +   case PIPE_BLENDFACTOR_INV_SRC_COLOR: +      return BRW_BLENDFACTOR_INV_SRC_COLOR; +   case PIPE_BLENDFACTOR_DST_COLOR: +      return BRW_BLENDFACTOR_DST_COLOR; +   case PIPE_BLENDFACTOR_INV_DST_COLOR: +      return BRW_BLENDFACTOR_INV_DST_COLOR; +   case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +      return BRW_BLENDFACTOR_INV_SRC_ALPHA; +   case PIPE_BLENDFACTOR_DST_ALPHA: +      return BRW_BLENDFACTOR_DST_ALPHA; +   case PIPE_BLENDFACTOR_INV_DST_ALPHA: +      return BRW_BLENDFACTOR_INV_DST_ALPHA; +   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: +      return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE; +   case PIPE_BLENDFACTOR_CONST_COLOR: +      return BRW_BLENDFACTOR_CONST_COLOR; +   case PIPE_BLENDFACTOR_INV_CONST_COLOR: +      return BRW_BLENDFACTOR_INV_CONST_COLOR; +   case PIPE_BLENDFACTOR_CONST_ALPHA: +      return BRW_BLENDFACTOR_CONST_ALPHA; +   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: +      return BRW_BLENDFACTOR_INV_CONST_ALPHA; +   default: +      assert(0); +      return BRW_BLENDFACTOR_ZERO; +   } +} diff --git a/src/gallium/drivers/i965simple/brw_util.h b/src/gallium/drivers/i965simple/brw_util.h new file mode 100644 index 0000000000..d60e5934db --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_util.h @@ -0,0 +1,43 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. +  + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: +  + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. +  + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +  + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ +           + +#ifndef BRW_UTIL_H +#define BRW_UTIL_H + +#include "pipe/p_state.h" + +extern unsigned brw_count_bits( unsigned val ); +extern unsigned brw_translate_blend_factor( int factor ); +extern unsigned brw_translate_blend_equation( int mode ); + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_vs.c b/src/gallium/drivers/i965simple/brw_vs.c new file mode 100644 index 0000000000..92327e896d --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs.c @@ -0,0 +1,120 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. +  + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: +  + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. +  + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +  + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#include "brw_context.h" +#include "brw_vs.h" +#include "brw_util.h" +#include "brw_state.h" + + +static void do_vs_prog( struct brw_context *brw, +			const struct brw_vertex_program *vp, +			struct brw_vs_prog_key *key ) +{ +   unsigned program_size; +   const unsigned *program; +   struct brw_vs_compile c; + +   memset(&c, 0, sizeof(c)); +   memcpy(&c.key, key, sizeof(*key)); + +   brw_init_compile(&c.func); +   c.vp = vp; + +   c.prog_data.outputs_written = vp->info.num_outputs; +   c.prog_data.inputs_read = vp->info.num_inputs; + +#if 0 +   if (c.key.copy_edgeflag) { +      c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE; +      c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG; +   } +#endif + +   /* Emit GEN4 code. +    */ +   brw_vs_emit(&c); + +   /* get the program +    */ +   program = brw_get_program(&c.func, &program_size); + +   /* +    */ +   brw->vs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_VS_PROG], +					      &c.key, +					      sizeof(c.key), +					      program, +					      program_size, +					      &c.prog_data, +					      &brw->vs.prog_data); +} + + +static void brw_upload_vs_prog( struct brw_context *brw ) +{ +   struct brw_vs_prog_key key; +   const struct brw_vertex_program *vp = brw->attribs.VertexProgram; + +   assert(vp); + +   memset(&key, 0, sizeof(key)); + +   /* Just upload the program verbatim for now.  Always send it all +    * the inputs it asks for, whether they are varying or not. +    */ +   key.program_string_id = vp->id; +   key.nr_userclip = brw->attribs.Clip.nr; +   key.copy_edgeflag = (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL || +			brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL); + +   /* Make an early check for the key. +    */ +   if (brw_search_cache(&brw->cache[BRW_VS_PROG], +			&key, sizeof(key), +			&brw->vs.prog_data, +			&brw->vs.prog_gs_offset)) +       return; + +   do_vs_prog(brw, vp, &key); +} + + +/* See brw_vs.c: + */ +const struct brw_tracked_state brw_vs_prog = { +   .dirty = { +      .brw   = BRW_NEW_VS, +      .cache = 0 +   }, +   .update = brw_upload_vs_prog +}; diff --git a/src/gallium/drivers/i965simple/brw_vs.h b/src/gallium/drivers/i965simple/brw_vs.h new file mode 100644 index 0000000000..070f9dfcae --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs.h @@ -0,0 +1,82 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#ifndef BRW_VS_H +#define BRW_VS_H + + +#include "brw_context.h" +#include "brw_eu.h" + + +struct brw_vs_prog_key { +   unsigned program_string_id; +   unsigned nr_userclip:4; +   unsigned copy_edgeflag:1; +   unsigned know_w_is_one:1; +   unsigned pad:26; +}; + + +struct brw_vs_compile { +   struct brw_compile func; +   struct brw_vs_prog_key key; +   struct brw_vs_prog_data prog_data; + +   const struct brw_vertex_program *vp; + +   unsigned nr_inputs; + +   unsigned first_output; +   unsigned nr_outputs; + +   unsigned first_tmp; +   unsigned last_tmp; + +   struct brw_reg r0; +   struct brw_reg r1; +   struct brw_reg regs[12][128]; +   struct brw_reg tmp; +   struct brw_reg stack; + +   struct { +       boolean used_in_src; +       struct brw_reg reg; +   } output_regs[128]; + +   struct brw_reg userplane[6]; + +}; + +void brw_vs_emit( struct brw_vs_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c new file mode 100644 index 0000000000..e03d653482 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -0,0 +1,1330 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + +#include "brw_context.h" +#include "brw_vs.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" + +struct brw_prog_info { +   unsigned num_temps; +   unsigned num_addrs; +   unsigned num_consts; + +   unsigned writes_psize; + +   unsigned pos_idx; +   unsigned result_edge_idx; +   unsigned edge_flag_idx; +   unsigned psize_idx; +}; + +/* Do things as simply as possible.  Allocate and populate all regs + * ahead of time. + */ +static void brw_vs_alloc_regs( struct brw_vs_compile *c, +                               struct brw_prog_info *info ) +{ +   unsigned i, reg = 0, mrf; +   unsigned nr_params; + +   /* r0 -- reserved as usual +    */ +   c->r0 = brw_vec8_grf(reg, 0); reg++; + +   /* User clip planes from curbe: +    */ +   if (c->key.nr_userclip) { +      for (i = 0; i < c->key.nr_userclip; i++) { +	 c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1); +      } + +      /* Deal with curbe alignment: +       */ +      reg += ((6+c->key.nr_userclip+3)/4)*2; +   } + +   /* Vertex program parameters from curbe: +    */ +   nr_params = c->prog_data.max_const; +   for (i = 0; i < nr_params; i++) { +      c->regs[TGSI_FILE_CONSTANT][i] = stride(brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); +   } +   reg += (nr_params+1)/2; +   c->prog_data.curb_read_length = reg - 1; + + + +   /* Allocate input regs: +    */ +   c->nr_inputs = c->vp->info.num_inputs; +   for (i = 0; i < c->nr_inputs; i++) { +	 c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0); +	 reg++; +   } + + +   /* Allocate outputs: TODO: could organize the non-position outputs +    * to go straight into message regs. +    */ +   c->nr_outputs = 0; +   c->first_output = reg; +   mrf = 4; +   for (i = 0; i < c->vp->info.num_outputs; i++) { +      c->nr_outputs++; +#if 0 +      if (i == VERT_RESULT_HPOS) { +         c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); +         reg++; +      } +      else if (i == VERT_RESULT_PSIZ) { +         c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); +         reg++; +         mrf++;		/* just a placeholder?  XXX fix later stages & remove this */ +      } +      else { +         c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); +         mrf++; +      } +#else +      /*treat pos differently for now */ +      if (i == info->pos_idx) { +         c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); +         reg++; +      } else { +         c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); +         mrf++; +      } +#endif +   } + +   /* Allocate program temporaries: +    */ +   for (i = 0; i < info->num_temps; i++) { +      c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0); +      reg++; +   } + +   /* Address reg(s).  Don't try to use the internal address reg until +    * deref time. +    */ +   for (i = 0; i < info->num_addrs; i++) { +      c->regs[TGSI_FILE_ADDRESS][i] =  brw_reg(BRW_GENERAL_REGISTER_FILE, +                                               reg, +                                               0, +                                               BRW_REGISTER_TYPE_D, +                                               BRW_VERTICAL_STRIDE_8, +                                               BRW_WIDTH_8, +                                               BRW_HORIZONTAL_STRIDE_1, +                                               BRW_SWIZZLE_XXXX, +                                               TGSI_WRITEMASK_X); +      reg++; +   } + +   for (i = 0; i < 128; i++) { +      if (c->output_regs[i].used_in_src) { +         c->output_regs[i].reg = brw_vec8_grf(reg, 0); +         reg++; +      } +   } + +   c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); +   reg += 2; + + +   /* Some opcodes need an internal temporary: +    */ +   c->first_tmp = reg; +   c->last_tmp = reg;		/* for allocation purposes */ + +   /* Each input reg holds data from two vertices.  The +    * urb_read_length is the number of registers read from *each* +    * vertex urb, so is half the amount: +    */ +   c->prog_data.urb_read_length = (c->nr_inputs+1)/2; + +   c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4; +   c->prog_data.total_grf = reg; +} + + +static struct brw_reg get_tmp( struct brw_vs_compile *c ) +{ +   struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); + +   if (++c->last_tmp > c->prog_data.total_grf) +      c->prog_data.total_grf = c->last_tmp; + +   return tmp; +} + +static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) +{ +   if (tmp.nr == c->last_tmp-1) +      c->last_tmp--; +} + +static void release_tmps( struct brw_vs_compile *c ) +{ +   c->last_tmp = c->first_tmp; +} + + +static void unalias1( struct brw_vs_compile *c, +		      struct brw_reg dst, +		      struct brw_reg arg0, +		      void (*func)( struct brw_vs_compile *, +				    struct brw_reg, +				    struct brw_reg )) +{ +   if (dst.file == arg0.file && dst.nr == arg0.nr) { +      struct brw_compile *p = &c->func; +      struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); +      func(c, tmp, arg0); +      brw_MOV(p, dst, tmp); +   } +   else { +      func(c, dst, arg0); +   } +} + +static void unalias2( struct brw_vs_compile *c, +		      struct brw_reg dst, +		      struct brw_reg arg0, +		      struct brw_reg arg1, +		      void (*func)( struct brw_vs_compile *, +				    struct brw_reg, +				    struct brw_reg, +				    struct brw_reg )) +{ +   if ((dst.file == arg0.file && dst.nr == arg0.nr) || +       (dst.file == arg1.file && dst.nr == arg1.nr)) { +      struct brw_compile *p = &c->func; +      struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); +      func(c, tmp, arg0, arg1); +      brw_MOV(p, dst, tmp); +   } +   else { +      func(c, dst, arg0, arg1); +   } +} + +static void emit_sop( struct brw_compile *p, +                      struct brw_reg dst, +                      struct brw_reg arg0, +                      struct brw_reg arg1, +		      unsigned cond) +{ +   brw_push_insn_state(p); +   brw_CMP(p, brw_null_reg(), cond, arg0, arg1); +   brw_set_predicate_control(p, BRW_PREDICATE_NONE); +   brw_MOV(p, dst, brw_imm_f(1.0f)); +   brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); +   brw_MOV(p, dst, brw_imm_f(0.0f)); +   brw_pop_insn_state(p); +} + +static void emit_seq( struct brw_compile *p, +                      struct brw_reg dst, +                      struct brw_reg arg0, +                      struct brw_reg arg1 ) +{ +   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ); +} + +static void emit_sne( struct brw_compile *p, +                      struct brw_reg dst, +                      struct brw_reg arg0, +                      struct brw_reg arg1 ) +{ +   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ); +} +static void emit_slt( struct brw_compile *p, +		      struct brw_reg dst, +		      struct brw_reg arg0, +		      struct brw_reg arg1 ) +{ +   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L); +} + +static void emit_sle( struct brw_compile *p, +		      struct brw_reg dst, +		      struct brw_reg arg0, +		      struct brw_reg arg1 ) +{ +   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE); +} + +static void emit_sgt( struct brw_compile *p, +		      struct brw_reg dst, +		      struct brw_reg arg0, +		      struct brw_reg arg1 ) +{ +   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G); +} + +static void emit_sge( struct brw_compile *p, +		      struct brw_reg dst, +		      struct brw_reg arg0, +		      struct brw_reg arg1 ) +{ +  emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE); +} + +static void emit_max( struct brw_compile *p, +		      struct brw_reg dst, +		      struct brw_reg arg0, +		      struct brw_reg arg1 ) +{ +   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); +   brw_SEL(p, dst, arg1, arg0); +   brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + +static void emit_min( struct brw_compile *p, +		      struct brw_reg dst, +		      struct brw_reg arg0, +		      struct brw_reg arg1 ) +{ +   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); +   brw_SEL(p, dst, arg0, arg1); +   brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + + +static void emit_math1( struct brw_vs_compile *c, +			unsigned function, +			struct brw_reg dst, +			struct brw_reg arg0, +			unsigned precision) +{ +   /* There are various odd behaviours with SEND on the simulator.  In +    * addition there are documented issues with the fact that the GEN4 +    * processor doesn't do dependency control properly on SEND +    * results.  So, on balance, this kludge to get around failures +    * with writemasked math results looks like it might be necessary +    * whether that turns out to be a simulator bug or not: +    */ +   struct brw_compile *p = &c->func; +   struct brw_reg tmp = dst; +   boolean need_tmp = (dst.dw1.bits.writemask != 0xf || +			 dst.file != BRW_GENERAL_REGISTER_FILE); + +   if (need_tmp) +      tmp = get_tmp(c); + +   brw_math(p, +	    tmp, +	    function, +	    BRW_MATH_SATURATE_NONE, +	    2, +	    arg0, +	    BRW_MATH_DATA_SCALAR, +	    precision); + +   if (need_tmp) { +      brw_MOV(p, dst, tmp); +      release_tmp(c, tmp); +   } +} + +static void emit_math2( struct brw_vs_compile *c, +			unsigned function, +			struct brw_reg dst, +			struct brw_reg arg0, +			struct brw_reg arg1, +			unsigned precision) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg tmp = dst; +   boolean need_tmp = (dst.dw1.bits.writemask != 0xf || +			 dst.file != BRW_GENERAL_REGISTER_FILE); + +   if (need_tmp) +      tmp = get_tmp(c); + +   brw_MOV(p, brw_message_reg(3), arg1); + +   brw_math(p, +	    tmp, +	    function, +	    BRW_MATH_SATURATE_NONE, +	    2, + 	    arg0, +	    BRW_MATH_DATA_SCALAR, +	    precision); + +   if (need_tmp) { +      brw_MOV(p, dst, tmp); +      release_tmp(c, tmp); +   } +} + + + +static void emit_exp_noalias( struct brw_vs_compile *c, +			      struct brw_reg dst, +			      struct brw_reg arg0 ) +{ +   struct brw_compile *p = &c->func; + + +   if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) { +      struct brw_reg tmp = get_tmp(c); +      struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D); + +      /* tmp_d = floor(arg0.x) */ +      brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0)); + +      /* result[0] = 2.0 ^ tmp */ + +      /* Adjust exponent for floating point: +       * exp += 127 +       */ +      brw_ADD(p, brw_writemask(tmp_d, TGSI_WRITEMASK_X), tmp_d, brw_imm_d(127)); + +      /* Install exponent and sign. +       * Excess drops off the edge: +       */ +      brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), TGSI_WRITEMASK_X), +	      tmp_d, brw_imm_d(23)); + +      release_tmp(c, tmp); +   } + +   if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) { +      /* result[1] = arg0.x - floor(arg0.x) */ +      brw_FRC(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0, 0)); +   } + +   if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) { +      /* As with the LOG instruction, we might be better off just +       * doing a taylor expansion here, seeing as we have to do all +       * the prep work. +       * +       * If mathbox partial precision is too low, consider also: +       * result[3] = result[0] * EXP(result[1]) +       */ +      emit_math1(c, +		 BRW_MATH_FUNCTION_EXP, +		 brw_writemask(dst, TGSI_WRITEMASK_Z), +		 brw_swizzle1(arg0, 0), +		 BRW_MATH_PRECISION_PARTIAL); +   } + +   if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) { +      /* result[3] = 1.0; */ +      brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), brw_imm_f(1)); +   } +} + + +static void emit_log_noalias( struct brw_vs_compile *c, +			      struct brw_reg dst, +			      struct brw_reg arg0 ) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg tmp = dst; +   struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); +   struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD); +   boolean need_tmp = (dst.dw1.bits.writemask != 0xf || +			 dst.file != BRW_GENERAL_REGISTER_FILE); + +   if (need_tmp) { +      tmp = get_tmp(c); +      tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); +   } + +   /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt +    * according to spec: +    * +    * These almost look likey they could be joined up, but not really +    * practical: +    * +    * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127 +    * result[1].i = (x.i & ((1<<23)-1)        + (127<<23) +    */ +   if (dst.dw1.bits.writemask & TGSI_WRITEMASK_XZ) { +      brw_AND(p, +	      brw_writemask(tmp_ud, TGSI_WRITEMASK_X), +	      brw_swizzle1(arg0_ud, 0), +	      brw_imm_ud((1U<<31)-1)); + +      brw_SHR(p, +	      brw_writemask(tmp_ud, TGSI_WRITEMASK_X), +	      tmp_ud, +	      brw_imm_ud(23)); + +      brw_ADD(p, +	      brw_writemask(tmp, TGSI_WRITEMASK_X), +	      retype(tmp_ud, BRW_REGISTER_TYPE_D),	/* does it matter? */ +	      brw_imm_d(-127)); +   } + +   if (dst.dw1.bits.writemask & TGSI_WRITEMASK_YZ) { +      brw_AND(p, +	      brw_writemask(tmp_ud, TGSI_WRITEMASK_Y), +	      brw_swizzle1(arg0_ud, 0), +	      brw_imm_ud((1<<23)-1)); + +      brw_OR(p, +	     brw_writemask(tmp_ud, TGSI_WRITEMASK_Y), +	     tmp_ud, +	     brw_imm_ud(127<<23)); +   } + +   if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) { +      /* result[2] = result[0] + LOG2(result[1]); */ + +      /* Why bother?  The above is just a hint how to do this with a +       * taylor series.  Maybe we *should* use a taylor series as by +       * the time all the above has been done it's almost certainly +       * quicker than calling the mathbox, even with low precision. +       * +       * Options are: +       *    - result[0] + mathbox.LOG2(result[1]) +       *    - mathbox.LOG2(arg0.x) +       *    - result[0] + inline_taylor_approx(result[1]) +       */ +      emit_math1(c, +		 BRW_MATH_FUNCTION_LOG, +		 brw_writemask(tmp, TGSI_WRITEMASK_Z), +		 brw_swizzle1(tmp, 1), +		 BRW_MATH_PRECISION_FULL); + +      brw_ADD(p, +	      brw_writemask(tmp, TGSI_WRITEMASK_Z), +	      brw_swizzle1(tmp, 2), +	      brw_swizzle1(tmp, 0)); +   } + +   if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) { +      /* result[3] = 1.0; */ +      brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_W), brw_imm_f(1)); +   } + +   if (need_tmp) { +      brw_MOV(p, dst, tmp); +      release_tmp(c, tmp); +   } +} + + + + +/* Need to unalias - consider swizzles:   r0 = DST r0.xxxx r1 + */ +static void emit_dst_noalias( struct brw_vs_compile *c, +			      struct brw_reg dst, +			      struct brw_reg arg0, +			      struct brw_reg arg1) +{ +   struct brw_compile *p = &c->func; + +   /* There must be a better way to do this: +    */ +   if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) +      brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_X), brw_imm_f(1.0)); +   if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) +      brw_MUL(p, brw_writemask(dst, TGSI_WRITEMASK_Y), arg0, arg1); +   if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) +      brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Z), arg0); +   if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) +      brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), arg1); +} + +static void emit_xpd( struct brw_compile *p, +		      struct brw_reg dst, +		      struct brw_reg t, +		      struct brw_reg u) +{ +   brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3),  brw_swizzle(u,2,0,1,3)); +   brw_MAC(p, dst,     negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3)); +} + + + +static void emit_lit_noalias( struct brw_vs_compile *c, +			      struct brw_reg dst, +			      struct brw_reg arg0 ) +{ +   struct brw_compile *p = &c->func; +   struct brw_instruction *if_insn; +   struct brw_reg tmp = dst; +   boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + +   if (need_tmp) +      tmp = get_tmp(c); + +   brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_YZ), brw_imm_f(0)); +   brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_XW), brw_imm_f(1)); + +   /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order +    * to get all channels active inside the IF.  In the clipping code +    * we run with NoMask, so it's not an option and we can use +    * BRW_EXECUTE_1 for all comparisions. +    */ +   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0)); +   if_insn = brw_IF(p, BRW_EXECUTE_8); +   { +      brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0,0)); + +      brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0)); +      brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_Z),  brw_swizzle1(arg0,1)); +      brw_set_predicate_control(p, BRW_PREDICATE_NONE); + +      emit_math2(c, +		 BRW_MATH_FUNCTION_POW, +		 brw_writemask(dst, TGSI_WRITEMASK_Z), +		 brw_swizzle1(tmp, 2), +		 brw_swizzle1(arg0, 3), +		 BRW_MATH_PRECISION_PARTIAL); +   } + +   brw_ENDIF(p, if_insn); +} + + + + + +/* TODO: relative addressing! + */ +static struct brw_reg get_reg( struct brw_vs_compile *c, +			       unsigned file, +			       unsigned index ) +{ +   switch (file) { +   case TGSI_FILE_TEMPORARY: +   case TGSI_FILE_INPUT: +   case TGSI_FILE_OUTPUT: +      assert(c->regs[file][index].nr != 0); +      return c->regs[file][index]; +   case TGSI_FILE_CONSTANT: +      assert(c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm].nr != 0); +      return c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm]; +   case TGSI_FILE_IMMEDIATE: +      assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0); +      return c->regs[TGSI_FILE_CONSTANT][index]; +   case TGSI_FILE_ADDRESS: +      assert(index == 0); +      return c->regs[file][index]; + +   case TGSI_FILE_NULL:			/* undef values */ +      return brw_null_reg(); + +   default: +      assert(0); +      return brw_null_reg(); +   } +} + + + +static struct brw_reg deref( struct brw_vs_compile *c, +			     struct brw_reg arg, +			     int offset) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg tmp = vec4(get_tmp(c)); +   struct brw_reg vp_address = retype(vec1(get_reg(c, TGSI_FILE_ADDRESS, 0)), BRW_REGISTER_TYPE_UW); +   unsigned byte_offset = arg.nr * 32 + arg.subnr + offset * 16; +   struct brw_reg indirect = brw_vec4_indirect(0,0); + +   { +      brw_push_insn_state(p); +      brw_set_access_mode(p, BRW_ALIGN_1); + +      /* This is pretty clunky - load the address register twice and +       * fetch each 4-dword value in turn.  There must be a way to do +       * this in a single pass, but I couldn't get it to work. +       */ +      brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset)); +      brw_MOV(p, tmp, indirect); + +      brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset)); +      brw_MOV(p, suboffset(tmp, 4), indirect); + +      brw_pop_insn_state(p); +   } + +   return vec8(tmp); +} + + +static void emit_arl( struct brw_vs_compile *c, +		      struct brw_reg dst, +		      struct brw_reg arg0 ) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg tmp = dst; +   boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + +   if (need_tmp) +      tmp = get_tmp(c); + +   brw_RNDD(p, tmp, arg0); +   brw_MUL(p, dst, tmp, brw_imm_d(16)); + +   if (need_tmp) +      release_tmp(c, tmp); +} + + +/* Will return mangled results for SWZ op.  The emit_swz() function + * ignores this result and recalculates taking extended swizzles into + * account. + */ +static struct brw_reg get_arg( struct brw_vs_compile *c, +			       struct tgsi_src_register *src ) +{ +   struct brw_reg reg; + +   if (src->File == TGSI_FILE_NULL) +      return brw_null_reg(); + +#if 0 +   if (src->RelAddr) +      reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); +   else +#endif +      reg = get_reg(c, src->File, src->Index); + +   /* Convert 3-bit swizzle to 2-bit. +    */ +   reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SwizzleX, +				       src->SwizzleY, +				       src->SwizzleZ, +				       src->SwizzleW); + +   /* Note this is ok for non-swizzle instructions: +    */ +   reg.negate = src->Negate ? 1 : 0; + +   return reg; +} + + +static struct brw_reg get_dst( struct brw_vs_compile *c, +			       const struct tgsi_dst_register *dst ) +{ +   struct brw_reg reg = get_reg(c, dst->File, dst->Index); + +   reg.dw1.bits.writemask = dst->WriteMask; + +   return reg; +} + + + + +static void emit_swz( struct brw_vs_compile *c, +		      struct brw_reg dst, +		      struct tgsi_src_register src ) +{ +   struct brw_compile *p = &c->func; +   unsigned zeros_mask = 0; +   unsigned ones_mask = 0; +   unsigned src_mask = 0; +   ubyte src_swz[4]; +   boolean need_tmp = (src.Negate && +			 dst.file != BRW_GENERAL_REGISTER_FILE); +   struct brw_reg tmp = dst; +   unsigned i; + +   if (need_tmp) +      tmp = get_tmp(c); + +   for (i = 0; i < 4; i++) { +      if (dst.dw1.bits.writemask & (1<<i)) { +	 ubyte s = 0; +         switch(i) { +         case 0: +            s = src.SwizzleX; +            break; +            s = src.SwizzleY; +         case 1: +            break; +            s = src.SwizzleZ; +         case 2: +            break; +            s = src.SwizzleW; +         case 3: +            break; +         } +	 switch (s) { +	 case TGSI_SWIZZLE_X: +	 case TGSI_SWIZZLE_Y: +	 case TGSI_SWIZZLE_Z: +	 case TGSI_SWIZZLE_W: +	    src_mask |= 1<<i; +	    src_swz[i] = s; +	    break; +	 case TGSI_EXTSWIZZLE_ZERO: +	    zeros_mask |= 1<<i; +	    break; +	 case TGSI_EXTSWIZZLE_ONE: +	    ones_mask |= 1<<i; +	    break; +	 } +      } +   } + +   /* Do src first, in case dst aliases src: +    */ +   if (src_mask) { +      struct brw_reg arg0; + +#if 0 +      if (src.RelAddr) +	 arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); +      else +#endif +	 arg0 = get_reg(c, src.File, src.Index); + +      arg0 = brw_swizzle(arg0, +			 src_swz[0], src_swz[1], +			 src_swz[2], src_swz[3]); + +      brw_MOV(p, brw_writemask(tmp, src_mask), arg0); +   } + +   if (zeros_mask) +      brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0)); + +   if (ones_mask) +      brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); + +   if (src.Negate) +      brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp)); + +   if (need_tmp) { +      brw_MOV(p, dst, tmp); +      release_tmp(c, tmp); +   } +} + + + +/* Post-vertex-program processing.  Send the results to the URB. + */ +static void emit_vertex_write( struct brw_vs_compile *c, struct brw_prog_info *info) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg m0 = brw_message_reg(0); +   struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][info->pos_idx]; +   struct brw_reg ndc; + +   if (c->key.copy_edgeflag) { +      brw_MOV(p, +	      get_reg(c, TGSI_FILE_OUTPUT, info->result_edge_idx), +	      get_reg(c, TGSI_FILE_INPUT, info->edge_flag_idx)); +   } + + +   /* Build ndc coords?   TODO: Shortcircuit when w is known to be one. +    */ +   if (!c->key.know_w_is_one) { +      ndc = get_tmp(c); +      emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); +      brw_MUL(p, brw_writemask(ndc, TGSI_WRITEMASK_XYZ), pos, ndc); +   } +   else { +      ndc = pos; +   } + +   /* This includes the workaround for -ve rhw, so is no longer an +    * optional step: +    */ +   if (info->writes_psize || +       c->key.nr_userclip || +       !c->key.know_w_is_one) +   { +      struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); +      unsigned i; + +      brw_MOV(p, header1, brw_imm_ud(0)); + +      brw_set_access_mode(p, BRW_ALIGN_16); + +      if (info->writes_psize) { +	 struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][info->psize_idx]; +	 brw_MUL(p, brw_writemask(header1, TGSI_WRITEMASK_W), +                 brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); +	 brw_AND(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, +                 brw_imm_ud(0x7ff<<8)); +      } + + +      for (i = 0; i < c->key.nr_userclip; i++) { +	 brw_set_conditionalmod(p, BRW_CONDITIONAL_L); +	 brw_DP4(p, brw_null_reg(), pos, c->userplane[i]); +	 brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<i)); +	 brw_set_predicate_control(p, BRW_PREDICATE_NONE); +      } + + +      /* i965 clipping workaround: +       * 1) Test for -ve rhw +       * 2) If set, +       *      set ndc = (0,0,0,0) +       *      set ucp[6] = 1 +       * +       * Later, clipping will detect ucp[6] and ensure the primitive is +       * clipped against all fixed planes. +       */ +      if (!c->key.know_w_is_one) { +	 brw_CMP(p, +		 vec8(brw_null_reg()), +		 BRW_CONDITIONAL_L, +		 brw_swizzle1(ndc, 3), +		 brw_imm_f(0)); + +	 brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<6)); +	 brw_MOV(p, ndc, brw_imm_f(0)); +	 brw_set_predicate_control(p, BRW_PREDICATE_NONE); +      } + +      brw_set_access_mode(p, BRW_ALIGN_1);	/* why? */ +      brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1); +      brw_set_access_mode(p, BRW_ALIGN_16); + +      release_tmp(c, header1); +   } +   else { +      brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); +   } + + +   /* Emit the (interleaved) headers for the two vertices - an 8-reg +    * of zeros followed by two sets of NDC coordinates: +    */ +   brw_set_access_mode(p, BRW_ALIGN_1); +   brw_MOV(p, offset(m0, 2), ndc); +   brw_MOV(p, offset(m0, 3), pos); + + +   brw_urb_WRITE(p, +		 brw_null_reg(), /* dest */ +		 0,		/* starting mrf reg nr */ +		 c->r0,		/* src */ +		 0,		/* allocate */ +		 1,		/* used */ +		 c->nr_outputs + 3, /* msg len */ +		 0,		/* response len */ +		 1, 		/* eot */ +		 1, 		/* writes complete */ +		 0, 		/* urb destination offset */ +		 BRW_URB_SWIZZLE_INTERLEAVE); + +} + +static void +post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst ) +{ +   struct tgsi_parse_context parse; +   const struct tgsi_token *tokens = c->vp->program.tokens; +   tgsi_parse_init(&parse, tokens); +   while (!tgsi_parse_end_of_tokens(&parse)) { +      tgsi_parse_token(&parse); +      if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { +#if 0 +         struct brw_instruction *brw_inst1, *brw_inst2; +         const struct tgsi_full_instruction *inst1, *inst2; +         int offset; +         inst1 = &parse.FullToken.FullInstruction; +         brw_inst1 = inst1->Data; +         switch (inst1->Opcode) { +	 case TGSI_OPCODE_CAL: +	 case TGSI_OPCODE_BRA: +	    target_insn = inst1->BranchTarget; +	    inst2 = &c->vp->program.Base.Instructions[target_insn]; +	    brw_inst2 = inst2->Data; +	    offset = brw_inst2 - brw_inst1; +	    brw_set_src1(brw_inst1, brw_imm_d(offset*16)); +	    break; +	 case TGSI_OPCODE_END: +	    offset = end_inst - brw_inst1; +	    brw_set_src1(brw_inst1, brw_imm_d(offset*16)); +	    break; +	 default: +	    break; +         } +#endif +      } +   } +   tgsi_parse_free(&parse); +} + +static void process_declaration(const struct tgsi_full_declaration *decl, +                                struct brw_prog_info *info) +{ +   int first = decl->DeclarationRange.First; +   int last = decl->DeclarationRange.Last; +    +   switch(decl->Declaration.File) { +   case TGSI_FILE_CONSTANT:  +      info->num_consts += last - first + 1; +      break; +   case TGSI_FILE_INPUT: { +   } +      break; +   case TGSI_FILE_OUTPUT: { +      assert(last == first);	/* for now */ +      if (decl->Declaration.Semantic) { +         switch (decl->Semantic.SemanticName) { +         case TGSI_SEMANTIC_POSITION: { +            info->pos_idx = first; +         } +            break; +         case TGSI_SEMANTIC_COLOR: +            break; +         case TGSI_SEMANTIC_BCOLOR: +            break; +         case TGSI_SEMANTIC_FOG: +            break; +         case TGSI_SEMANTIC_PSIZE: { +            info->writes_psize = TRUE; +            info->psize_idx = first; +         } +            break; +         case TGSI_SEMANTIC_GENERIC: +            break; +         } +      } +   } +      break; +   case TGSI_FILE_TEMPORARY: { +      info->num_temps += (last - first) + 1; +   } +      break; +   case TGSI_FILE_SAMPLER: { +   } +      break; +   case TGSI_FILE_ADDRESS: { +      info->num_addrs += (last - first) + 1; +   } +      break; +   case TGSI_FILE_IMMEDIATE: { +   } +      break; +   case TGSI_FILE_NULL: { +   } +      break; +   } +} + +static void process_instruction(struct brw_vs_compile *c, +                                struct tgsi_full_instruction *inst, +                                struct brw_prog_info *info) +{ +   struct brw_reg args[3], dst; +   struct brw_compile *p = &c->func; +   /*struct brw_indirect stack_index = brw_indirect(0, 0);*/ +   unsigned i; +   unsigned index; +   unsigned file; +   /*FIXME: might not be the only one*/ +   const struct tgsi_dst_register *dst_reg = &inst->FullDstRegisters[0].DstRegister; +   /* +   struct brw_instruction *if_inst[MAX_IFSN]; +   unsigned insn, if_insn = 0; +   */ + +   for (i = 0; i < 3; i++) { +      struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; +      index = src->SrcRegister.Index; +      file = src->SrcRegister.File; +      if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src) +         args[i] = c->output_regs[index].reg; +      else +         args[i] = get_arg(c, &src->SrcRegister); +   } + +   /* Get dest regs.  Note that it is possible for a reg to be both +    * dst and arg, given the static allocation of registers.  So +    * care needs to be taken emitting multi-operation instructions. +    */ +   index = dst_reg->Index; +   file = dst_reg->File; +   if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src) +      dst = c->output_regs[index].reg; +   else +      dst = get_dst(c, dst_reg); + +   switch (inst->Instruction.Opcode) { +   case TGSI_OPCODE_ABS: +      brw_MOV(p, dst, brw_abs(args[0])); +      break; +   case TGSI_OPCODE_ADD: +      brw_ADD(p, dst, args[0], args[1]); +      break; +   case TGSI_OPCODE_DP3: +      brw_DP3(p, dst, args[0], args[1]); +      break; +   case TGSI_OPCODE_DP4: +      brw_DP4(p, dst, args[0], args[1]); +      break; +   case TGSI_OPCODE_DPH: +      brw_DPH(p, dst, args[0], args[1]); +      break; +   case TGSI_OPCODE_DST: +      unalias2(c, dst, args[0], args[1], emit_dst_noalias); +      break; +   case TGSI_OPCODE_EXP: +      unalias1(c, dst, args[0], emit_exp_noalias); +      break; +   case TGSI_OPCODE_EX2: +      emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); +      break; +   case TGSI_OPCODE_ARL: +      emit_arl(c, dst, args[0]); +      break; +   case TGSI_OPCODE_FLR: +      brw_RNDD(p, dst, args[0]); +      break; +   case TGSI_OPCODE_FRC: +      brw_FRC(p, dst, args[0]); +      break; +   case TGSI_OPCODE_LOG: +      unalias1(c, dst, args[0], emit_log_noalias); +      break; +   case TGSI_OPCODE_LG2: +      emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL); +      break; +   case TGSI_OPCODE_LIT: +      unalias1(c, dst, args[0], emit_lit_noalias); +      break; +   case TGSI_OPCODE_MAD: +      brw_MOV(p, brw_acc_reg(), args[2]); +      brw_MAC(p, dst, args[0], args[1]); +      break; +   case TGSI_OPCODE_MAX: +      emit_max(p, dst, args[0], args[1]); +      break; +   case TGSI_OPCODE_MIN: +      emit_min(p, dst, args[0], args[1]); +      break; +   case TGSI_OPCODE_MOV: +   case TGSI_OPCODE_SWZ: +#if 0 +      /* The args[0] value can't be used here as it won't have +       * correctly encoded the full swizzle: +       */ +      emit_swz(c, dst, inst->SrcReg[0] ); +#endif +      brw_MOV(p, dst, args[0]); +      break; +   case TGSI_OPCODE_MUL: +      brw_MUL(p, dst, args[0], args[1]); +      break; +   case TGSI_OPCODE_POW: +      emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); +      break; +   case TGSI_OPCODE_RCP: +      emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); +      break; +   case TGSI_OPCODE_RSQ: +      emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); +      break; + +   case TGSI_OPCODE_SEQ: +      emit_seq(p, dst, args[0], args[1]); +      break; +   case TGSI_OPCODE_SNE: +      emit_sne(p, dst, args[0], args[1]); +      break; +   case TGSI_OPCODE_SGE: +      emit_sge(p, dst, args[0], args[1]); +      break; +   case TGSI_OPCODE_SGT: +      emit_sgt(p, dst, args[0], args[1]); +      break; +   case TGSI_OPCODE_SLT: +      emit_slt(p, dst, args[0], args[1]); +      break; +   case TGSI_OPCODE_SLE: +      emit_sle(p, dst, args[0], args[1]); +      break; +   case TGSI_OPCODE_SUB: +      brw_ADD(p, dst, args[0], negate(args[1])); +      break; +   case TGSI_OPCODE_XPD: +      emit_xpd(p, dst, args[0], args[1]); +      break; +#if 0 +   case TGSI_OPCODE_IF: +      assert(if_insn < MAX_IFSN); +      if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); +      break; +   case TGSI_OPCODE_ELSE: +      if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); +      break; +   case TGSI_OPCODE_ENDIF: +      assert(if_insn > 0); +      brw_ENDIF(p, if_inst[--if_insn]); +      break; +   case TGSI_OPCODE_BRA: +      brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); +      brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); +      brw_set_predicate_control_flag_value(p, 0xff); +      break; +   case TGSI_OPCODE_CAL: +      brw_set_access_mode(p, BRW_ALIGN_1); +      brw_ADD(p, deref_1uw(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); +      brw_set_access_mode(p, BRW_ALIGN_16); +      brw_ADD(p, get_addr_reg(stack_index), +              get_addr_reg(stack_index), brw_imm_d(4)); +      inst->Data = &p->store[p->nr_insn]; +      brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); +      break; +#endif +   case TGSI_OPCODE_RET: +#if 0 +      brw_ADD(p, get_addr_reg(stack_index), +              get_addr_reg(stack_index), brw_imm_d(-4)); +      brw_set_access_mode(p, BRW_ALIGN_1); +      brw_MOV(p, brw_ip_reg(), deref_1uw(stack_index, 0)); +      brw_set_access_mode(p, BRW_ALIGN_16); +#else +      /*brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));*/ +#endif +      break; +   case TGSI_OPCODE_END: +      brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); +      break; +   case TGSI_OPCODE_BGNSUB: +   case TGSI_OPCODE_ENDSUB: +      break; +   default: +      debug_printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode); +      break; +   } + +   if (dst_reg->File == TGSI_FILE_OUTPUT +       && dst_reg->Index != info->pos_idx +       && c->output_regs[dst_reg->Index].used_in_src) +      brw_MOV(p, get_dst(c, dst_reg), dst); + +   release_tmps(c); +} + +/* Emit the fragment program instructions here. + */ +void brw_vs_emit(struct brw_vs_compile *c) +{ +#define MAX_IFSN 32 +   struct brw_compile *p = &c->func; +   struct brw_instruction *end_inst; +   struct tgsi_parse_context parse; +   struct brw_indirect stack_index = brw_indirect(0, 0); +   const struct tgsi_token *tokens = c->vp->program.tokens; +   struct brw_prog_info prog_info; +   unsigned allocated_registers = 0; +   memset(&prog_info, 0, sizeof(struct brw_prog_info)); + +   brw_set_compression_control(p, BRW_COMPRESSION_NONE); +   brw_set_access_mode(p, BRW_ALIGN_16); + +   tgsi_parse_init(&parse, tokens); +   /* Message registers can't be read, so copy the output into GRF register +      if they are used in source registers */ +   while (!tgsi_parse_end_of_tokens(&parse)) { +      tgsi_parse_token(&parse); +      unsigned i; +      switch (parse.FullToken.Token.Type) { +      case TGSI_TOKEN_TYPE_INSTRUCTION: { +         const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; +         for (i = 0; i < 3; ++i) { +            const struct tgsi_src_register *src = &inst->FullSrcRegisters[i].SrcRegister; +            unsigned index = src->Index; +            unsigned file = src->File; +            if (file == TGSI_FILE_OUTPUT) +               c->output_regs[index].used_in_src = TRUE; +         } +      } +         break; +      default: +         /* nothing */ +         break; +      } +   } +   tgsi_parse_free(&parse); + +   tgsi_parse_init(&parse, tokens); + +   while (!tgsi_parse_end_of_tokens(&parse)) { +      tgsi_parse_token(&parse); + +      switch (parse.FullToken.Token.Type) { +      case TGSI_TOKEN_TYPE_DECLARATION: { +         struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; +         process_declaration(decl, &prog_info); +      } +         break; +      case TGSI_TOKEN_TYPE_IMMEDIATE: { +         struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate; +         assert(imm->Immediate.NrTokens == 4 + 1); +         c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u.ImmediateFloat32[0].Float; +         c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u.ImmediateFloat32[1].Float; +         c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u.ImmediateFloat32[2].Float; +         c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u.ImmediateFloat32[3].Float; +         c->prog_data.num_imm++; +      } +         break; +      case TGSI_TOKEN_TYPE_INSTRUCTION: { +         struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; +         if (!allocated_registers) { +            /* first instruction (declerations finished). +             * now that we know what vars are being used allocate +             * registers for them.*/ +            c->prog_data.num_consts = prog_info.num_consts; +            c->prog_data.max_const = prog_info.num_consts + c->prog_data.num_imm; +            brw_vs_alloc_regs(c, &prog_info); + +	    brw_set_access_mode(p, BRW_ALIGN_1); +            brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); +	    brw_set_access_mode(p, BRW_ALIGN_16); +            allocated_registers = 1; +         } +         process_instruction(c, inst, &prog_info); +      } +         break; +      } +   } + +   end_inst = &p->store[p->nr_insn]; +   emit_vertex_write(c, &prog_info); +   post_vs_emit(c, end_inst); +   tgsi_parse_free(&parse); + +} diff --git a/src/gallium/drivers/i965simple/brw_vs_state.c b/src/gallium/drivers/i965simple/brw_vs_state.c new file mode 100644 index 0000000000..1eaff87892 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs_state.c @@ -0,0 +1,103 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +static void upload_vs_unit( struct brw_context *brw ) +{ +   struct brw_vs_unit_state vs; + +   memset(&vs, 0, sizeof(vs)); + +   /* CACHE_NEW_VS_PROG */ +   vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6; +   vs.thread0.grf_reg_count = align(brw->vs.prog_data->total_grf, 16) / 16 - 1; +   vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; +   vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; +   vs.thread3.dispatch_grf_start_reg = 1; + + +   /* BRW_NEW_URB_FENCE  */ +   vs.thread4.nr_urb_entries = brw->urb.nr_vs_entries; +   vs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; +   vs.thread4.max_threads = MIN2( +      MAX2(0, (brw->urb.nr_vs_entries - 6) / 2 - 1), +      15); + + + +   if (BRW_DEBUG & DEBUG_SINGLE_THREAD) +      vs.thread4.max_threads = 0; + +   /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ +   if (0 /*brw->attribs.Clip->ClipPlanesEnabled*/) { +      /* Note that we read in the userclip planes as well, hence +       * clip_start: +       */ +      vs.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; +   } +   else { +      vs.thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2; +   } + +   vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; +   vs.thread3.urb_entry_read_offset = 0; + +   /* No samplers for ARB_vp programs: +    */ +   vs.vs5.sampler_count = 0; + +   if (BRW_DEBUG & DEBUG_STATS) +      vs.thread4.stats_enable = 1; + +   /* Vertex program always enabled: +    */ +   vs.vs6.vs_enable = 1; + +   brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs ); +} + + +const struct brw_tracked_state brw_vs_unit = { +   .dirty = { +      .brw   = (BRW_NEW_CLIP | +		BRW_NEW_CURBE_OFFSETS | +		BRW_NEW_URB_FENCE), +      .cache = CACHE_NEW_VS_PROG +   }, +   .update = upload_vs_unit +}; diff --git a/src/gallium/drivers/i965simple/brw_winsys.h b/src/gallium/drivers/i965simple/brw_winsys.h new file mode 100644 index 0000000000..ec1e400418 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_winsys.h @@ -0,0 +1,209 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * This is the interface that i965simple requires any window system + * hosting it to implement.  This is the only include file in i965simple + * which is public. + * + */ + +#ifndef BRW_WINSYS_H +#define BRW_WINSYS_H + + +#include "pipe/p_defines.h" + + +/* Pipe drivers are (meant to be!) independent of both GL and the + * window system.  The window system provides a buffer manager and a + * set of additional hooks for things like command buffer submission, + * etc. + * + * There clearly has to be some agreement between the window system + * driver and the hardware driver about the format of command buffers, + * etc. + */ + +struct pipe_buffer; +struct pipe_fence_handle; +struct pipe_winsys; +struct pipe_screen; + + +/* The pipe driver currently understands the following chipsets: + */ +#define PCI_CHIP_I965_G			0x29A2 +#define PCI_CHIP_I965_Q			0x2992 +#define PCI_CHIP_I965_G_1		0x2982 +#define PCI_CHIP_I965_GM                0x2A02 +#define PCI_CHIP_I965_GME               0x2A12 + + +/* These are the names of all the state caches managed by the driver. + *  + * When data is uploaded to a buffer with buffer_subdata, we use the + * special version of that function below so that information about + * what type of data this is can be passed to the winsys backend. + * That in turn allows the correct flags to be set in the aub file + * dump to allow human-readable file dumps later on. + */ + +enum brw_cache_id { +   BRW_CC_VP, +   BRW_CC_UNIT, +   BRW_WM_PROG, +   BRW_SAMPLER_DEFAULT_COLOR, +   BRW_SAMPLER, +   BRW_WM_UNIT, +   BRW_SF_PROG, +   BRW_SF_VP, +   BRW_SF_UNIT, +   BRW_VS_UNIT, +   BRW_VS_PROG, +   BRW_GS_UNIT, +   BRW_GS_PROG, +   BRW_CLIP_VP, +   BRW_CLIP_UNIT, +   BRW_CLIP_PROG, +   BRW_SS_SURFACE, +   BRW_SS_SURF_BIND, + +   BRW_MAX_CACHE +}; + +#define BRW_CONSTANT_BUFFER BRW_MAX_CACHE + +/** + * Additional winsys interface for i965simple. + * + * It is an over-simple batchbuffer mechanism.  Will want to improve the + * performance of this, perhaps based on the cmdstream stuff.  It + * would be pretty impossible to implement swz on top of this + * interface. + * + * Will also need additions/changes to implement static/dynamic + * indirect state. + */ +struct brw_winsys { + +   void (*destroy)(struct brw_winsys *); +    +   /** +    * Reserve space on batch buffer. +    * +    * Returns a null pointer if there is insufficient space in the batch buffer +    * to hold the requested number of dwords and relocations. +    * +    * The number of dwords should also include the number of relocations. +    */ +   unsigned *(*batch_start)(struct brw_winsys *sws, +                            unsigned dwords, +                            unsigned relocs); + +   void (*batch_dword)(struct brw_winsys *sws, +                       unsigned dword); + +   /** +    * Emit a relocation to a buffer. +    * +    * Used not only when the buffer addresses are not pinned, but also to +    * ensure refered buffers will not be destroyed until the current batch +    * buffer execution is finished. +    * +    * The access flags is a combination of I915_BUFFER_ACCESS_WRITE and +    * I915_BUFFER_ACCESS_READ macros. +    */ +   void (*batch_reloc)(struct brw_winsys *sws, +                       struct pipe_buffer *buf, +                       unsigned access_flags, +                       unsigned delta); + + +   /* Not used yet, but really want this: +    */ +   void (*batch_end)( struct brw_winsys *sws ); + +   /** +    * Flush the batch buffer. +    * +    * Fence argument must point to NULL or to a previous fence, and the caller +    * must call fence_reference when done with the fence. +    */ +   void (*batch_flush)(struct brw_winsys *sws, +                       struct pipe_fence_handle **fence); + + +   /* A version of buffer_subdata that includes information for the +    * simulator: +    */ +   void (*buffer_subdata_typed)(struct brw_winsys *sws,  +				struct pipe_buffer *buf, +				unsigned long offset,  +				unsigned long size,  +				const void *data, +				unsigned data_type); +    + +   /* A cheat so we don't have to think about relocations in a couple +    * of places yet: +    */ +   unsigned (*get_buffer_offset)( struct brw_winsys *sws, +				  struct pipe_buffer *buf, +				  unsigned flags ); + +}; + +#define BRW_BUFFER_ACCESS_WRITE   0x1 +#define BRW_BUFFER_ACCESS_READ    0x2 + +#define BRW_BUFFER_USAGE_LIT_VERTEX  (PIPE_BUFFER_USAGE_CUSTOM << 0) + + +struct pipe_context *brw_create(struct pipe_screen *, +                                struct brw_winsys *, +                                unsigned pci_id); + +static inline boolean brw_batchbuffer_data(struct brw_winsys *winsys, +                                           const void *data, +                                           unsigned bytes) +{ +   static const unsigned incr = sizeof(unsigned); +   uint i; +   const unsigned *udata = (const unsigned*)(data); +   unsigned size = bytes/incr; + +   winsys->batch_start(winsys, size, 0); +   for (i = 0; i < size; ++i) { +      winsys->batch_dword(winsys, udata[i]); +   } +   winsys->batch_end(winsys); + +   return (i == size); +} +#endif diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c new file mode 100644 index 0000000000..10161f2d2f --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm.c @@ -0,0 +1,209 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#include "brw_context.h" +#include "brw_util.h" +#include "brw_wm.h" +#include "brw_eu.h" +#include "brw_state.h" +#include "util/u_memory.h" + + + +static void do_wm_prog( struct brw_context *brw, +			struct brw_fragment_program *fp, +			struct brw_wm_prog_key *key) +{ +   struct brw_wm_compile *c = CALLOC_STRUCT(brw_wm_compile); +   const unsigned *program; +   unsigned program_size; + +   c->key = *key; +   c->fp = fp; +    +   c->delta_xy[0] = brw_null_reg(); +   c->delta_xy[1] = brw_null_reg(); +   c->pixel_xy[0] = brw_null_reg(); +   c->pixel_xy[1] = brw_null_reg(); +   c->pixel_w = brw_null_reg(); + + +   debug_printf("XXXXXXXX FP\n"); +    +   brw_wm_glsl_emit(c); + +   /* get the program +    */ +   program = brw_get_program(&c->func, &program_size); + +   /* +    */ +   brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG], +					      &c->key, +					      sizeof(c->key), +					      program, +					      program_size, +					      &c->prog_data, +					      &brw->wm.prog_data ); + +   FREE(c); +} + + + +static void brw_wm_populate_key( struct brw_context *brw, +				 struct brw_wm_prog_key *key ) +{ +   /* BRW_NEW_FRAGMENT_PROGRAM */ +   struct brw_fragment_program *fp = +      (struct brw_fragment_program *)brw->attribs.FragmentProgram; +   unsigned lookup = 0; +   unsigned line_aa; +    +   memset(key, 0, sizeof(*key)); + +   /* Build the index for table lookup +    */ +   /* BRW_NEW_DEPTH_STENCIL */ +   if (fp->info.uses_kill || +       brw->attribs.DepthStencil->alpha.enabled) +      lookup |= IZ_PS_KILL_ALPHATEST_BIT; + +   if (fp->info.writes_z) +      lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + +   if (brw->attribs.DepthStencil->depth.enabled) +      lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + +   if (brw->attribs.DepthStencil->depth.enabled && +       brw->attribs.DepthStencil->depth.writemask) /* ?? */ +      lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + +   if (brw->attribs.DepthStencil->stencil[0].enabled) { +      lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + +      if (brw->attribs.DepthStencil->stencil[0].writemask || +	  brw->attribs.DepthStencil->stencil[1].writemask) +	 lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; +   } + +   /* XXX: when should this be disabled? +    */ +   if (1) +      lookup |= IZ_EARLY_DEPTH_TEST_BIT; + + +   line_aa = AA_NEVER; + +   /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ +   if (brw->attribs.Raster->line_smooth) { +      if (brw->reduced_primitive == PIPE_PRIM_LINES) { +	 line_aa = AA_ALWAYS; +      } +      else if (brw->reduced_primitive == PIPE_PRIM_TRIANGLES) { +	 if (brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_LINE) { +	    line_aa = AA_SOMETIMES; + +	    if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE || +		(brw->attribs.Raster->cull_mode == PIPE_WINDING_CW)) +	       line_aa = AA_ALWAYS; +	 } +	 else if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE) { +	    line_aa = AA_SOMETIMES; + +	    if (brw->attribs.Raster->cull_mode == PIPE_WINDING_CCW) +	       line_aa = AA_ALWAYS; +	 } +      } +   } + +   brw_wm_lookup_iz(line_aa, +		    lookup, +		    key); + + +#if 0 +   /* BRW_NEW_SAMPLER  +    * +    * Not doing any of this at the moment: +    */ +   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { +      const struct pipe_sampler_state *unit = brw->attribs.Samplers[i]; + +      if (unit) { + +	 if (unit->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { +	    key->shadowtex_mask |= 1<<i; +	 } +	 if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) +	    key->yuvtex_mask |= 1<<i; +      } +   } +#endif + + +   /* Extra info: +    */ +   key->program_string_id = fp->id; + +} + + +static void brw_upload_wm_prog( struct brw_context *brw ) +{ +   struct brw_wm_prog_key key; +   struct brw_fragment_program *fp = (struct brw_fragment_program *) +      brw->attribs.FragmentProgram; + +   brw_wm_populate_key(brw, &key); + +   /* Make an early check for the key. +    */ +   if (brw_search_cache(&brw->cache[BRW_WM_PROG], +			&key, sizeof(key), +			&brw->wm.prog_data, +			&brw->wm.prog_gs_offset)) +      return; + +   do_wm_prog(brw, fp, &key); +} + + +const struct brw_tracked_state brw_wm_prog = { +   .dirty = { +      .brw   = (BRW_NEW_FS | +		BRW_NEW_REDUCED_PRIMITIVE), +      .cache = 0 +   }, +   .update = brw_upload_wm_prog +}; + diff --git a/src/gallium/drivers/i965simple/brw_wm.h b/src/gallium/drivers/i965simple/brw_wm.h new file mode 100644 index 0000000000..b29c4393f0 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm.h @@ -0,0 +1,142 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. +  + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: +  + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. +  + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +  + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ +               + +#ifndef BRW_WM_H +#define BRW_WM_H + + +#include "brw_context.h" +#include "brw_eu.h" + +/* A big lookup table is used to figure out which and how many + * additional regs will inserted before the main payload in the WM + * program execution.  These mainly relate to depth and stencil + * processing and the early-depth-test optimization. + */ +#define IZ_PS_KILL_ALPHATEST_BIT    0x1 +#define IZ_PS_COMPUTES_DEPTH_BIT    0x2 +#define IZ_DEPTH_WRITE_ENABLE_BIT   0x4 +#define IZ_DEPTH_TEST_ENABLE_BIT    0x8 +#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10 +#define IZ_STENCIL_TEST_ENABLE_BIT  0x20 +#define IZ_EARLY_DEPTH_TEST_BIT     0x40 +#define IZ_BIT_MAX                  0x80 + +#define AA_NEVER     0 +#define AA_SOMETIMES 1 +#define AA_ALWAYS    2 + +struct brw_wm_prog_key { +   unsigned source_depth_reg:3; +   unsigned aa_dest_stencil_reg:3; +   unsigned dest_depth_reg:3; +   unsigned nr_depth_regs:3; +   unsigned shadowtex_mask:8; +   unsigned computes_depth:1;	/* could be derived from program string */ +   unsigned source_depth_to_render_target:1; +   unsigned runtime_check_aads_emit:1; + +   unsigned yuvtex_mask:8; + +   unsigned program_string_id; +}; + + + + + +#define PROGRAM_INTERNAL_PARAM +#define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */ +#define BRW_WM_MAX_INSN  (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + PIPE_MAX_ATTRIBS + 3) +#define BRW_WM_MAX_GRF   128		/* hardware limit */ +#define BRW_WM_MAX_VREG  (BRW_WM_MAX_INSN * 4) +#define BRW_WM_MAX_REF   (BRW_WM_MAX_INSN * 12) +#define BRW_WM_MAX_PARAM 256 +#define BRW_WM_MAX_CONST 256 +#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS + +#define PAYLOAD_DEPTH     (PIPE_MAX_ATTRIBS) + +#define MAX_IFSN 32 +#define MAX_LOOP_DEPTH 32 + +struct brw_wm_compile { +   struct brw_compile func; +   struct brw_wm_prog_key key; +   struct brw_wm_prog_data prog_data; /* result */ + +   struct brw_fragment_program *fp; + +   unsigned grf_limit; +   unsigned max_wm_grf; + + +   struct brw_reg pixel_xy[2]; +   struct brw_reg delta_xy[2]; +   struct brw_reg pixel_w; + + +   struct brw_reg wm_regs[8][32][4]; + +   struct brw_reg payload_depth[4]; +   struct brw_reg payload_coef[16]; + +   struct brw_reg emit_mask_reg; + +   struct brw_instruction *if_inst[MAX_IFSN]; +   int if_insn; + +   struct brw_instruction *loop_inst[MAX_LOOP_DEPTH]; +   int loop_insn; + +   struct brw_instruction *inst0; +   struct brw_instruction *inst1; + +   struct brw_reg stack; +   struct brw_indirect stack_index; + +   unsigned reg_index; + +   unsigned tmp_start; +   unsigned tmp_index; +}; + + + +void brw_wm_lookup_iz( unsigned line_aa, +		       unsigned lookup, +		       struct brw_wm_prog_key *key ); + +void brw_wm_glsl_emit(struct brw_wm_compile *c); +void brw_wm_emit_decls(struct brw_wm_compile *c); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c new file mode 100644 index 0000000000..d50e66f613 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -0,0 +1,392 @@ + +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" + +static struct brw_reg alloc_tmp(struct brw_wm_compile *c) +{ +   c->tmp_index++; +   c->reg_index = MAX2(c->reg_index, c->tmp_start + c->tmp_index); +   return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); +} + +static void release_tmps(struct brw_wm_compile *c) +{ +   c->tmp_index = 0; +} + + + +static int is_null( struct brw_reg reg ) +{ +   return (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && +	   reg.nr == BRW_ARF_NULL); +} + +static void emit_pixel_xy( struct brw_wm_compile *c ) +{ +   if (is_null(c->pixel_xy[0])) { + +      struct brw_compile *p = &c->func; +      struct brw_reg r1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); + +      c->pixel_xy[0] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); +      c->pixel_xy[1] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); + +      /* Calculate pixel centers by adding 1 or 0 to each of the +       * micro-tile coordinates passed in r1. +       */ +      brw_ADD(p, +	      c->pixel_xy[0], +	      stride(suboffset(r1_uw, 4), 2, 4, 0), +	      brw_imm_v(0x10101010)); + +      brw_ADD(p, +	      c->pixel_xy[1], +	      stride(suboffset(r1_uw, 5), 2, 4, 0), +	      brw_imm_v(0x11001100)); +   } +} + + + + + + +static void emit_delta_xy( struct brw_wm_compile *c ) +{ +   if (is_null(c->delta_xy[0])) { +      struct brw_compile *p = &c->func; +      struct brw_reg r1 = brw_vec1_grf(1, 0); + +      emit_pixel_xy(c); + +      c->delta_xy[0] = alloc_tmp(c); +      c->delta_xy[1] = alloc_tmp(c); + +      /* Calc delta X,Y by subtracting origin in r1 from the pixel +       * centers. +       */ +      brw_ADD(p, +	      c->delta_xy[0], +	      retype(c->pixel_xy[0], BRW_REGISTER_TYPE_UW), +	      negate(r1)); + +      brw_ADD(p, +	      c->delta_xy[1], +	      retype(c->pixel_xy[1], BRW_REGISTER_TYPE_UW), +	      negate(suboffset(r1,1))); +   } +} + + + +#if 0 +static void emit_pixel_w( struct brw_wm_compile *c ) +{ +   if (is_null(c->pixel_w)) { +      struct brw_compile *p = &c->func; + +      struct brw_reg interp_wpos = c->coef_wpos; +       +      c->pixel_w = alloc_tmp(c); + +      emit_delta_xy(c); + +      /* Calc 1/w - just linterp wpos[3] optimized by putting the +       * result straight into a message reg. +       */ +      struct brw_reg interp3 = brw_vec1_grf(interp_wpos.nr+1, 4); +      brw_LINE(p, brw_null_reg(), interp3, c->delta_xy[0]); +      brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), c->delta_xy[1]); + +      /* Calc w */ +      brw_math_16( p,  +		   c->pixel_w, +		   BRW_MATH_FUNCTION_INV, +		   BRW_MATH_SATURATE_NONE, +		   2,  +		   brw_null_reg(), +		   BRW_MATH_PRECISION_FULL); +   } +} +#endif + + +static void emit_cinterp(struct brw_wm_compile *c, +			 int idx, +			 int mask ) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg interp[4]; +   struct brw_reg coef = c->payload_coef[idx]; +   int i; + +   interp[0] = brw_vec1_grf(coef.nr, 0); +   interp[1] = brw_vec1_grf(coef.nr, 4); +   interp[2] = brw_vec1_grf(coef.nr+1, 0); +   interp[3] = brw_vec1_grf(coef.nr+1, 4); + +   for(i = 0; i < 4; i++ ) { +      if (mask & (1<<i)) { +	 struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i]; +	 brw_MOV(p, dst, suboffset(interp[i],3)); +      } +   } +} + +static void emit_linterp(struct brw_wm_compile *c, +			 int idx, +			 int mask ) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg interp[4]; +   struct brw_reg coef = c->payload_coef[idx]; +   int i; + +   emit_delta_xy(c); + +   interp[0] = brw_vec1_grf(coef.nr, 0); +   interp[1] = brw_vec1_grf(coef.nr, 4); +   interp[2] = brw_vec1_grf(coef.nr+1, 0); +   interp[3] = brw_vec1_grf(coef.nr+1, 4); + +   for(i = 0; i < 4; i++ ) { +      if (mask & (1<<i)) { +	 struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i]; +	 brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]); +	 brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); +      } +   } +} + +#if 0 +static void emit_pinterp(struct brw_wm_compile *c, +			 int idx, +			 int mask ) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg interp[4]; +   struct brw_reg coef = c->payload_coef[idx]; +   int i; + +   get_delta_xy(c); +   get_pixel_w(c); + +   interp[0] = brw_vec1_grf(coef.nr, 0); +   interp[1] = brw_vec1_grf(coef.nr, 4); +   interp[2] = brw_vec1_grf(coef.nr+1, 0); +   interp[3] = brw_vec1_grf(coef.nr+1, 4); + +   for(i = 0; i < 4; i++ ) { +      if (mask & (1<<i)) { +	 struct brw_reg dst = allocate_reg(c, TGSI_FILE_INPUT, idx, i); +	 brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]); +	 brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); +	 brw_MUL(p, dst, dst, c->pixel_w); +      } +   } +} +#endif + + + +#if 0 +static void emit_wpos( ) +{  +   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); +   struct tgsi_full_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); +   struct tgsi_full_src_register deltas = get_delta_xy(c); +   struct tgsi_full_src_register arg2; +   unsigned opcode; + +   opcode = WM_LINTERP; +   arg2 = src_undef(); + +   /* Have to treat wpos.xy specially: +    */ +   emit_op(c, +	   WM_WPOSXY, +	   dst_mask(dst, WRITEMASK_XY), +	   0, 0, 0, +	   get_pixel_xy(c), +	   src_undef(), +	   src_undef()); +       +   dst = dst_mask(dst, WRITEMASK_ZW); + +   /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw +    */ +   emit_op(c, +	   WM_LINTERP, +	   dst, +	   0, 0, 0, +	   interp, +	   deltas, +	   arg2); +} +#endif + + + + +/* Perform register allocation: + *  + *  -- r0??? + *  -- passthrough depth regs (and stencil/aa??) + *  -- curbe ?? + *  -- inputs (coefficients) + * + * Use a totally static register allocation.  This will perform poorly + * but is an easy way to get started (again). + */ +static void prealloc_reg(struct brw_wm_compile *c) +{ +   int i, j; +   int nr_curbe_regs = 0; + +   /* R0, then some depth related regs: +    */ +   for (i = 0; i < c->key.nr_depth_regs; i++) { +      c->payload_depth[i] =  brw_vec8_grf(i*2, 0); +      c->reg_index += 2; +   } + + +   /* Then a copy of our part of the CURBE entry: +    */ +   { +      int nr_constants = c->fp->info.file_max[TGSI_FILE_CONSTANT] + 1; +      int index = 0; + +      /* XXX number of constants, or highest numbered constant? */ +      assert(nr_constants == c->fp->info.file_count[TGSI_FILE_CONSTANT]); + +      c->prog_data.max_const = 4*nr_constants; +      for (i = 0; i < nr_constants; i++) { +	 for (j = 0; j < 4; j++, index++)  +	    c->wm_regs[TGSI_FILE_CONSTANT][i][j] = brw_vec1_grf(c->reg_index + index/8, +								index%8); +      } + +      nr_curbe_regs = 2*((4*nr_constants+15)/16); +      c->reg_index += nr_curbe_regs; +   } + +   /* Adjust for parameter coefficients for position, which are +    * currently always provided. +    */ +//   c->position_coef[i] = brw_vec8_grf(c->reg_index, 0); +   c->reg_index += 2; + +   /* Next we receive the plane coefficients for parameter +    * interpolation: +    */ +   assert(c->fp->info.file_max[TGSI_FILE_INPUT] == c->fp->info.num_inputs); +   for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) { +      c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0); +      c->reg_index += 2; +   } + +   c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; +   c->prog_data.urb_read_length = (c->fp->info.num_inputs + 1) * 2; +   c->prog_data.curb_read_length = nr_curbe_regs; + +   /* That's the end of the payload, now we can start allocating registers. +    */ +   c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); +   c->reg_index++; + +   c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); +   c->reg_index += 2; + +   /* Now allocate room for the interpolated inputs and staging +    * registers for the outputs: +    */ +   /* XXX do we want to loop over the _number_ of inputs/outputs or loop +    * to the highest input/output index that's used? +    *  Probably the same, actually. +    */ +   assert(c->fp->info.file_max[TGSI_FILE_INPUT] + 1 == c->fp->info.num_inputs); +   assert(c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1 == c->fp->info.num_outputs); +   for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++)  +      for (j = 0; j < 4; j++) +	 c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); + +   for (i = 0; i < c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1; i++)  +      for (j = 0; j < 4; j++) +	 c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); + +   /* Beyond this we should only need registers for internal temporaries: +    */ +   c->tmp_start = c->reg_index; +} + + + + + +/* Need to interpolate fragment program inputs in as a preamble to the + * shader.  A more sophisticated compiler would do this on demand, but + * we'll do it up front: + */ +void brw_wm_emit_decls(struct brw_wm_compile *c) +{ +   struct tgsi_parse_context parse; +   int done = 0; + +   prealloc_reg(c); + +   tgsi_parse_init( &parse, c->fp->program.tokens ); + +   while( !done && +	  !tgsi_parse_end_of_tokens( &parse ) )  +   { +      tgsi_parse_token( &parse ); + +      switch( parse.FullToken.Token.Type ) { +      case TGSI_TOKEN_TYPE_DECLARATION: +      { +	 const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; +	 unsigned first = decl->DeclarationRange.First; +	 unsigned last = decl->DeclarationRange.Last; +	 unsigned mask = decl->Declaration.UsageMask; /* ? */ +	 unsigned i; + +	 if (decl->Declaration.File != TGSI_FILE_INPUT) +	    break; + +	 for( i = first; i <= last; i++ ) { +	    switch (decl->Declaration.Interpolate) { +	    case TGSI_INTERPOLATE_CONSTANT: +	       emit_cinterp(c, i, mask); +	       break; + +	    case TGSI_INTERPOLATE_LINEAR: +	       emit_linterp(c, i, mask); +	       break; + +	    case TGSI_INTERPOLATE_PERSPECTIVE: +	       //emit_pinterp(c, i, mask); +	       emit_linterp(c, i, mask); +	       break; +	    } +	 } +	 break; +      } +      case TGSI_TOKEN_TYPE_IMMEDIATE: +      case TGSI_TOKEN_TYPE_INSTRUCTION: +      default: +         done = 1; +	 break; +      } +   } + +   tgsi_parse_free (&parse); +    +   release_tmps(c); +} diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c new file mode 100644 index 0000000000..ab6410aa60 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -0,0 +1,1076 @@ + +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" + + + +static int get_scalar_dst_index(struct tgsi_full_instruction *inst) +{ +   struct tgsi_dst_register dst = inst->FullDstRegisters[0].DstRegister; +   int i; +   for (i = 0; i < 4; i++) +      if (dst.WriteMask & (1<<i)) +	 break; +   return i; +} + +static struct brw_reg alloc_tmp(struct brw_wm_compile *c) +{ +   c->tmp_index++; +   c->reg_index = MAX2(c->reg_index, c->tmp_index); +   return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); +} + +static void release_tmps(struct brw_wm_compile *c) +{ +   c->tmp_index = 0; +} + + +static struct brw_reg +get_reg(struct brw_wm_compile *c, int file, int index, int component ) +{ +   switch (file) { +   case TGSI_FILE_NULL: +      return brw_null_reg(); + +   case TGSI_FILE_SAMPLER: +      /* Should never get here: +       */ +      assert (0);	        +      return brw_null_reg(); + +   case TGSI_FILE_IMMEDIATE: +      /* These need a different path: +       */ +      assert(0); +      return brw_null_reg(); + +        +   case TGSI_FILE_CONSTANT: +   case TGSI_FILE_INPUT: +   case TGSI_FILE_OUTPUT: +   case TGSI_FILE_TEMPORARY: +   case TGSI_FILE_ADDRESS: +      return c->wm_regs[file][index][component]; + +   default: +      assert(0); +      return brw_null_reg(); +   } +} + + +static struct brw_reg get_dst_reg(struct brw_wm_compile *c, +				  struct tgsi_full_instruction *inst,  +				  int component) +{ +   return get_reg(c,  +		  inst->FullDstRegisters[0].DstRegister.File,  +		  inst->FullDstRegisters[0].DstRegister.Index, +		  component); +} + +static int get_swz( struct tgsi_src_register src, int index ) +{ +   switch (index & 3) { +   case 0: return src.SwizzleX; +   case 1: return src.SwizzleY; +   case 2: return src.SwizzleZ; +   case 3: return src.SwizzleW; +   default: return 0; +   } +} + +static int get_ext_swz( struct tgsi_src_register_ext_swz src, int index ) +{ +   switch (index & 3) { +   case 0: return src.ExtSwizzleX; +   case 1: return src.ExtSwizzleY; +   case 2: return src.ExtSwizzleZ; +   case 3: return src.ExtSwizzleW; +   default: return 0; +   } +} + +static struct brw_reg get_src_reg(struct brw_wm_compile *c, +				  struct tgsi_full_src_register *src,  +				  int index) +{ +   struct brw_reg reg; +   int component = index; +   int neg = 0; +   int abs = 0; + +   if (src->SrcRegister.Negate) +      neg = 1; + +   component = get_swz(src->SrcRegister, component); + +   /* Yes, there are multiple negates: +    */ +   switch (component & 3) { +   case 0: neg ^= src->SrcRegisterExtSwz.NegateX; break; +   case 1: neg ^= src->SrcRegisterExtSwz.NegateY; break; +   case 2: neg ^= src->SrcRegisterExtSwz.NegateZ; break; +   case 3: neg ^= src->SrcRegisterExtSwz.NegateW; break; +   } + +   /* And multiple swizzles, fun isn't it: +    */ +   component = get_ext_swz(src->SrcRegisterExtSwz, component); + +   /* Not handling indirect lookups yet: +    */ +   assert(src->SrcRegister.Indirect == 0); + +   /* Don't know what dimension means: +    */ +   assert(src->SrcRegister.Dimension == 0); + +   /* Will never handle any of this stuff:  +    */ +   assert(src->SrcRegisterExtMod.Complement == 0); +   assert(src->SrcRegisterExtMod.Bias == 0); +   assert(src->SrcRegisterExtMod.Scale2X == 0); + +   if (src->SrcRegisterExtMod.Absolute) +      abs = 1; + +   /* Another negate!  This is a post-absolute negate, which we +    * can't do.  Need to clean the crap out of tgsi somehow. +    */ +   assert(src->SrcRegisterExtMod.Negate == 0); + +   switch( component ) { +   case TGSI_EXTSWIZZLE_X: +   case TGSI_EXTSWIZZLE_Y: +   case TGSI_EXTSWIZZLE_Z: +   case TGSI_EXTSWIZZLE_W: +      reg = get_reg(c,  +		    src->SrcRegister.File,  +		    src->SrcRegister.Index,  +		    component ); + +      if (neg)  +	 reg = negate(reg); +    +      if (abs) +	 reg = brw_abs(reg); + +      break; + +      /* XXX: this won't really work in the general case, but we know +       * that the extended swizzle is only allowed in the SWZ +       * instruction (right??), in which case using an immediate +       * directly will work. +       */ +   case TGSI_EXTSWIZZLE_ZERO: +      reg = brw_imm_f(0); +      break; + +   case TGSI_EXTSWIZZLE_ONE: +      if (neg && !abs) +	 reg = brw_imm_f(-1.0); +      else +	 reg = brw_imm_f(1.0); +      break; + +   default: +      assert(0); +      break; +   } + +     +   return reg; +} + +static void emit_abs( struct brw_wm_compile *c, +		      struct tgsi_full_instruction *inst) +{ +   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + +   int i; +   struct brw_compile *p = &c->func; +   brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); +   for (i = 0; i < 4; i++) { +      if (mask & (1<<i)) { +	 struct brw_reg src, dst; +	 dst = get_dst_reg(c, inst, i); +	 src = get_src_reg(c, &inst->FullSrcRegisters[0], i); +	 brw_MOV(p, dst, brw_abs(src)); /* NOTE */ +      } +   } +   brw_set_saturate(p, 0); +} + + +static void emit_xpd(struct brw_wm_compile *c, +		     struct tgsi_full_instruction *inst) +{ +   int i; +   struct brw_compile *p = &c->func; +   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; +   for (i = 0; i < 4; i++) { +      unsigned i2 = (i+2)%3; +      unsigned i1 = (i+1)%3; +      if (mask & (1<<i)) { +	 struct brw_reg src0, src1, dst; +	 dst = get_dst_reg(c, inst, i); +	 src0 = negate(get_src_reg(c, &inst->FullSrcRegisters[0], i2)); +	 src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i1); +	 brw_MUL(p, brw_null_reg(), src0, src1); +	 src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i1); +	 src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i2); +	 brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); +	 brw_MAC(p, dst, src0, src1); +	 brw_set_saturate(p, 0); +      } +   } +   brw_set_saturate(p, 0); +} + +static void emit_dp3(struct brw_wm_compile *c, +		     struct tgsi_full_instruction *inst) +{ +   struct brw_reg src0[3], src1[3], dst; +   int i; +   struct brw_compile *p = &c->func; +   for (i = 0; i < 3; i++) { +      src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); +      src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); +   } + +   dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); +   brw_MUL(p, brw_null_reg(), src0[0], src1[0]); +   brw_MAC(p, brw_null_reg(), src0[1], src1[1]); +   brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); +   brw_MAC(p, dst, src0[2], src1[2]); +   brw_set_saturate(p, 0); +} + +static void emit_dp4(struct brw_wm_compile *c, +		     struct tgsi_full_instruction *inst) +{ +   struct brw_reg src0[4], src1[4], dst; +   int i; +   struct brw_compile *p = &c->func; +   for (i = 0; i < 4; i++) { +      src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); +      src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); +   } +   dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); +   brw_MUL(p, brw_null_reg(), src0[0], src1[0]); +   brw_MAC(p, brw_null_reg(), src0[1], src1[1]); +   brw_MAC(p, brw_null_reg(), src0[2], src1[2]); +   brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); +   brw_MAC(p, dst, src0[3], src1[3]); +   brw_set_saturate(p, 0); +} + +static void emit_dph(struct brw_wm_compile *c, +		     struct tgsi_full_instruction *inst) +{ +   struct brw_reg src0[4], src1[4], dst; +   int i; +   struct brw_compile *p = &c->func; +   for (i = 0; i < 4; i++) { +      src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); +      src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); +   } +   dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); +   brw_MUL(p, brw_null_reg(), src0[0], src1[0]); +   brw_MAC(p, brw_null_reg(), src0[1], src1[1]); +   brw_MAC(p, dst, src0[2], src1[2]); +   brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); +   brw_ADD(p, dst, src0[3], src1[3]); +   brw_set_saturate(p, 0); +} + +static void emit_math1(struct brw_wm_compile *c, +		       struct tgsi_full_instruction *inst, unsigned func) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg src0, dst; + +   src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); +   dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); +   brw_MOV(p, brw_message_reg(2), src0); +   brw_math(p, +	    dst, +	    func, +	    ((inst->Instruction.Saturate != TGSI_SAT_NONE)  +	     ? BRW_MATH_SATURATE_SATURATE  +	     : BRW_MATH_SATURATE_NONE), +	    2, +	    brw_null_reg(), +	    BRW_MATH_DATA_VECTOR, +	    BRW_MATH_PRECISION_FULL); +} + + +static void emit_alu2(struct brw_wm_compile *c,		       +		      struct tgsi_full_instruction *inst, +		      unsigned opcode) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg src0, src1, dst; +   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; +   int i; +   brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); +   for (i = 0 ; i < 4; i++) { +      if (mask & (1<<i)) { +	 dst = get_dst_reg(c, inst, i); +	 src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); +	 src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); +	 brw_alu2(p, opcode, dst, src0, src1); +      } +   } +   brw_set_saturate(p, 0); +} + + +static void emit_alu1(struct brw_wm_compile *c, +		      struct tgsi_full_instruction *inst, +		      unsigned opcode) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg src0, dst; +   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; +   int i; +   brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); +   for (i = 0 ; i < 4; i++) { +      if (mask & (1<<i)) { +	 dst = get_dst_reg(c, inst, i); +	 src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); +	 brw_alu1(p, opcode, dst, src0); +      } +   } +   if (inst->Instruction.Saturate != TGSI_SAT_NONE) +      brw_set_saturate(p, 0); +} + + +static void emit_max(struct brw_wm_compile *c, +		     struct tgsi_full_instruction *inst) +{ +   struct brw_compile *p = &c->func; +   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; +   struct brw_reg src0, src1, dst; +   int i; +   brw_push_insn_state(p); +   for (i = 0; i < 4; i++) { +      if (mask & (1<<i)) { +	 dst = get_dst_reg(c, inst, i); +	 src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); +	 src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); +	 brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); +	 brw_MOV(p, dst, src0); +	 brw_set_saturate(p, 0); + +	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); +	 brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); +	 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); +	 brw_MOV(p, dst, src1); +	 brw_set_saturate(p, 0); +	 brw_set_predicate_control_flag_value(p, 0xff); +      } +   } +   brw_pop_insn_state(p); +} + +static void emit_min(struct brw_wm_compile *c, +		     struct tgsi_full_instruction *inst) +{ +   struct brw_compile *p = &c->func; +   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; +   struct brw_reg src0, src1, dst; +   int i; +   brw_push_insn_state(p); +   for (i = 0; i < 4; i++) { +      if (mask & (1<<i)) { +	 dst = get_dst_reg(c, inst, i); +	 src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); +	 src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); +	 brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); +	 brw_MOV(p, dst, src0); +	 brw_set_saturate(p, 0); + +	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); +	 brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); +	 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); +	 brw_MOV(p, dst, src1); +	 brw_set_saturate(p, 0); +	 brw_set_predicate_control_flag_value(p, 0xff); +      } +   } +   brw_pop_insn_state(p); +} + +static void emit_pow(struct brw_wm_compile *c, +		     struct tgsi_full_instruction *inst) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg dst, src0, src1; +   dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); +   src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); +   src1 = get_src_reg(c, &inst->FullSrcRegisters[1], 0); + +   brw_MOV(p, brw_message_reg(2), src0); +   brw_MOV(p, brw_message_reg(3), src1); + +   brw_math(p, +	    dst, +	    BRW_MATH_FUNCTION_POW, +	    (inst->Instruction.Saturate != TGSI_SAT_NONE  +	     ? BRW_MATH_SATURATE_SATURATE  +	     : BRW_MATH_SATURATE_NONE), +	    2, +	    brw_null_reg(), +	    BRW_MATH_DATA_VECTOR, +	    BRW_MATH_PRECISION_FULL); +} + +static void emit_lrp(struct brw_wm_compile *c, +		     struct tgsi_full_instruction *inst) +{ +   struct brw_compile *p = &c->func; +   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; +   struct brw_reg dst, tmp1, tmp2, src0, src1, src2; +   int i; +   for (i = 0; i < 4; i++) { +      if (mask & (1<<i)) { +	 dst = get_dst_reg(c, inst, i); +	 src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + +	 src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + +	 if (src1.nr == dst.nr) { +	    tmp1 = alloc_tmp(c); +	    brw_MOV(p, tmp1, src1); +	 } else +	    tmp1 = src1; + +	 src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); +	 if (src2.nr == dst.nr) { +	    tmp2 = alloc_tmp(c); +	    brw_MOV(p, tmp2, src2); +	 } else +	    tmp2 = src2; + +	 brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); +	 brw_MUL(p, brw_null_reg(), dst, tmp2); +	 brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); +	 brw_MAC(p, dst, src0, tmp1); +	 brw_set_saturate(p, 0); +      } +      release_tmps(c); +   } +} + +static void emit_kil(struct brw_wm_compile *c) +{ +   struct brw_compile *p = &c->func; +   struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); +   brw_push_insn_state(p); +   brw_set_mask_control(p, BRW_MASK_DISABLE); +   brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK +   brw_AND(p, depth, c->emit_mask_reg, depth); +   brw_pop_insn_state(p); +} + +static void emit_mad(struct brw_wm_compile *c, +		     struct tgsi_full_instruction *inst) +{ +   struct brw_compile *p = &c->func; +   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; +   struct brw_reg dst, src0, src1, src2; +   int i; + +   for (i = 0; i < 4; i++) { +      if (mask & (1<<i)) { +	 dst = get_dst_reg(c, inst, i); +	 src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); +	 src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); +	 src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); +	 brw_MUL(p, dst, src0, src1); + +	 brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); +	 brw_ADD(p, dst, dst, src2); +	 brw_set_saturate(p, 0); +      } +   } +} + +static void emit_sop(struct brw_wm_compile *c, +		     struct tgsi_full_instruction *inst, unsigned cond) +{ +   struct brw_compile *p = &c->func; +   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; +   struct brw_reg dst, src0, src1; +   int i; + +   brw_push_insn_state(p); +   for (i = 0; i < 4; i++) { +      if (mask & (1<<i)) { +	 dst = get_dst_reg(c, inst, i); +	 src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); +	 src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); +	 brw_CMP(p, brw_null_reg(), cond, src0, src1); +	 brw_set_predicate_control(p, BRW_PREDICATE_NONE); +	 brw_MOV(p, dst, brw_imm_f(0.0)); +	 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); +	 brw_MOV(p, dst, brw_imm_f(1.0)); +      } +   } +   brw_pop_insn_state(p); +} + + +static void emit_ddx(struct brw_wm_compile *c, +		     struct tgsi_full_instruction *inst) +{ +   struct brw_compile *p = &c->func; +   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; +   struct brw_reg interp[4]; +   struct brw_reg dst; +   struct brw_reg src0, w; +   unsigned nr, i; +   src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); +   w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); +   nr = src0.nr; +   interp[0] = brw_vec1_grf(nr, 0); +   interp[1] = brw_vec1_grf(nr, 4); +   interp[2] = brw_vec1_grf(nr+1, 0); +   interp[3] = brw_vec1_grf(nr+1, 4); +   brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); +   for(i = 0; i < 4; i++ ) { +      if (mask & (1<<i)) { +	 dst = get_dst_reg(c, inst, i); +	 brw_MOV(p, dst, interp[i]); +	 brw_MUL(p, dst, dst, w); +      } +   } +   brw_set_saturate(p, 0); +} + +static void emit_ddy(struct brw_wm_compile *c, +		     struct tgsi_full_instruction *inst) +{ +   struct brw_compile *p = &c->func; +   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; +   struct brw_reg interp[4]; +   struct brw_reg dst; +   struct brw_reg src0, w; +   unsigned nr, i; + +   src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); +   nr = src0.nr; +   w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); +   interp[0] = brw_vec1_grf(nr, 0); +   interp[1] = brw_vec1_grf(nr, 4); +   interp[2] = brw_vec1_grf(nr+1, 0); +   interp[3] = brw_vec1_grf(nr+1, 4); +   brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); +   for(i = 0; i < 4; i++ ) { +      if (mask & (1<<i)) { +	 dst = get_dst_reg(c, inst, i); +	 brw_MOV(p, dst, suboffset(interp[i], 1)); +	 brw_MUL(p, dst, dst, w); +      } +   } +   brw_set_saturate(p, 0); +} + +/* TODO +   BIAS on SIMD8 not workind yet... +*/ +static void emit_txb(struct brw_wm_compile *c, +		     struct tgsi_full_instruction *inst) +{ +#if 0 +   struct brw_compile *p = &c->func; +   struct brw_reg payload_reg = c->payload_depth[0]; +   struct brw_reg dst[4], src[4]; +   unsigned i; +   for (i = 0; i < 4; i++) +      dst[i] = get_dst_reg(c, inst, i); +   for (i = 0; i < 4; i++) +      src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + +#if 0 +   switch (inst->TexSrcTarget) { +   case TEXTURE_1D_INDEX: +      brw_MOV(p, brw_message_reg(2), src[0]); +      brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); +      brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); +      break; +   case TEXTURE_2D_INDEX: +   case TEXTURE_RECT_INDEX: +      brw_MOV(p, brw_message_reg(2), src[0]); +      brw_MOV(p, brw_message_reg(3), src[1]); +      brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); +      break; +   default: +      brw_MOV(p, brw_message_reg(2), src[0]); +      brw_MOV(p, brw_message_reg(3), src[1]); +      brw_MOV(p, brw_message_reg(4), src[2]); +      break; +   } +#else +   brw_MOV(p, brw_message_reg(2), src[0]); +   brw_MOV(p, brw_message_reg(3), src[1]); +   brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); +#endif + +   brw_MOV(p, brw_message_reg(5), src[3]); +   brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); +   brw_SAMPLE(p, +	      retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), +	      1, +	      retype(payload_reg, BRW_REGISTER_TYPE_UW), +	      inst->TexSrcUnit + 1, /* surface */ +	      inst->TexSrcUnit,     /* sampler */ +	      inst->FullDstRegisters[0].DstRegister.WriteMask, +	      BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, +	      4, +	      4, +	      0); +#endif +} + +static void emit_tex(struct brw_wm_compile *c, +		     struct tgsi_full_instruction *inst) +{ +#if 0 +   struct brw_compile *p = &c->func; +   struct brw_reg payload_reg = c->payload_depth[0]; +   struct brw_reg dst[4], src[4]; +   unsigned msg_len; +   unsigned i, nr; +   unsigned emit; +   boolean shadow = (c->key.shadowtex_mask & (1<<inst->TexSrcUnit)) ? 1 : 0; + +   for (i = 0; i < 4; i++) +      dst[i] = get_dst_reg(c, inst, i); +   for (i = 0; i < 4; i++) +      src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + +#if 0 +   switch (inst->TexSrcTarget) { +   case TEXTURE_1D_INDEX: +      emit = WRITEMASK_X; +      nr = 1; +      break; +   case TEXTURE_2D_INDEX: +   case TEXTURE_RECT_INDEX: +      emit = WRITEMASK_XY; +      nr = 2; +      break; +   default: +      emit = WRITEMASK_XYZ; +      nr = 3; +      break; +   } +#else +   emit = WRITEMASK_XY; +   nr = 2; +#endif + +   msg_len = 1; + +   for (i = 0; i < nr; i++) { +      static const unsigned swz[4] = {0,1,2,2}; +      if (emit & (1<<i)) +	 brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]); +      else +	 brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0)); +      msg_len += 1; +   } + +   if (shadow) { +      brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); +      brw_MOV(p, brw_message_reg(6), src[2]); +   } + +   brw_SAMPLE(p, +	      retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), +	      1, +	      retype(payload_reg, BRW_REGISTER_TYPE_UW), +	      inst->TexSrcUnit + 1, /* surface */ +	      inst->TexSrcUnit,     /* sampler */ +	      inst->FullDstRegisters[0].DstRegister.WriteMask, +	      BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, +	      4, +	      shadow ? 6 : 4, +	      0); + +   if (shadow) +      brw_MOV(p, dst[3], brw_imm_f(1.0)); +#endif +} + + + + + + + + +static void emit_fb_write(struct brw_wm_compile *c, +			  struct tgsi_full_instruction *inst) +{ +   struct brw_compile *p = &c->func; +   int nr = 2; +   int channel; +   int base_reg = 0; + +   // src0 = output color +   // src1 = payload_depth[0] +   // src2 = output depth +   // dst = ??? + + + +   /* Reserve a space for AA - may not be needed: +    */ +   if (c->key.aa_dest_stencil_reg) +      nr += 1; + +   { +      brw_push_insn_state(p); +      for (channel = 0; channel < 4; channel++) { +	 struct brw_reg src0 = c->wm_regs[TGSI_FILE_OUTPUT][0][channel]; + +	 /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */ +	 /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */ +	 brw_MOV(p, brw_message_reg(nr + channel), src0); +      } +      /* skip over the regs populated above: */ +      nr += 8; +      brw_pop_insn_state(p); +   } +     + +   /* Pass through control information: +    */ +   /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */ +   { +      brw_push_insn_state(p); +      brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ +      brw_MOV(p, +	      brw_message_reg(base_reg + 1), +	      brw_vec8_grf(1, 0)); +      brw_pop_insn_state(p); +   } + +   /* Send framebuffer write message: */ +   brw_fb_WRITE(p, +		retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), +		base_reg, +		retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), +		0,              /* render surface always 0 */ +		nr, +		0, +		1); + +} + + +static void brw_wm_emit_instruction( struct brw_wm_compile *c, +				     struct tgsi_full_instruction *inst ) +{ +   struct brw_compile *p = &c->func; + +#if 0    +   if (inst->CondUpdate) +      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); +   else +      brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); +#else +   brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); +#endif + +   switch (inst->Instruction.Opcode) { +   case TGSI_OPCODE_ABS: +      emit_abs(c, inst); +      break; +   case TGSI_OPCODE_ADD: +      emit_alu2(c, inst, BRW_OPCODE_ADD); +      break; +   case TGSI_OPCODE_SUB: +      assert(0); +//      emit_alu2(c, inst, BRW_OPCODE_SUB); +      break; +   case TGSI_OPCODE_FRC: +      emit_alu1(c, inst, BRW_OPCODE_FRC); +      break; +   case TGSI_OPCODE_FLR: +      assert(0); +//      emit_alu1(c, inst, BRW_OPCODE_FLR); +      break; +   case TGSI_OPCODE_LRP: +      emit_lrp(c, inst); +      break; +   case TGSI_OPCODE_INT: +      emit_alu1(c, inst, BRW_OPCODE_RNDD); +      break; +   case TGSI_OPCODE_MOV: +      emit_alu1(c, inst, BRW_OPCODE_MOV); +      break; +   case TGSI_OPCODE_DP3: +      emit_dp3(c, inst); +      break; +   case TGSI_OPCODE_DP4: +      emit_dp4(c, inst); +      break; +   case TGSI_OPCODE_XPD: +      emit_xpd(c, inst); +      break; +   case TGSI_OPCODE_DPH: +      emit_dph(c, inst); +      break; +   case TGSI_OPCODE_RCP: +      emit_math1(c, inst, BRW_MATH_FUNCTION_INV); +      break; +   case TGSI_OPCODE_RSQ: +      emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); +      break; +   case TGSI_OPCODE_SIN: +      emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); +      break; +   case TGSI_OPCODE_COS: +      emit_math1(c, inst, BRW_MATH_FUNCTION_COS); +      break; +   case TGSI_OPCODE_EX2: +      emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); +      break; +   case TGSI_OPCODE_LG2: +      emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); +      break; +   case TGSI_OPCODE_MAX: +      emit_max(c, inst); +      break; +   case TGSI_OPCODE_MIN: +      emit_min(c, inst); +      break; +   case TGSI_OPCODE_DDX: +      emit_ddx(c, inst); +      break; +   case TGSI_OPCODE_DDY: +      emit_ddy(c, inst); +      break; +   case TGSI_OPCODE_SLT: +      emit_sop(c, inst, BRW_CONDITIONAL_L); +      break; +   case TGSI_OPCODE_SLE: +      emit_sop(c, inst, BRW_CONDITIONAL_LE); +      break; +   case TGSI_OPCODE_SGT: +      emit_sop(c, inst, BRW_CONDITIONAL_G); +      break; +   case TGSI_OPCODE_SGE: +      emit_sop(c, inst, BRW_CONDITIONAL_GE); +      break; +   case TGSI_OPCODE_SEQ: +      emit_sop(c, inst, BRW_CONDITIONAL_EQ); +      break; +   case TGSI_OPCODE_SNE: +      emit_sop(c, inst, BRW_CONDITIONAL_NEQ); +      break; +   case TGSI_OPCODE_MUL: +      emit_alu2(c, inst, BRW_OPCODE_MUL); +      break; +   case TGSI_OPCODE_POW: +      emit_pow(c, inst); +      break; +   case TGSI_OPCODE_MAD: +      emit_mad(c, inst); +      break; +   case TGSI_OPCODE_TEX: +      emit_tex(c, inst); +      break; +   case TGSI_OPCODE_TXB: +      emit_txb(c, inst); +      break; +   case TGSI_OPCODE_TEXKILL: +      emit_kil(c); +      break; +   case TGSI_OPCODE_IF: +      assert(c->if_insn < MAX_IFSN); +      c->if_inst[c->if_insn++] = brw_IF(p, BRW_EXECUTE_8); +      break; +   case TGSI_OPCODE_ELSE: +      c->if_inst[c->if_insn-1]  = brw_ELSE(p, c->if_inst[c->if_insn-1]); +      break; +   case TGSI_OPCODE_ENDIF: +      assert(c->if_insn > 0); +      brw_ENDIF(p, c->if_inst[--c->if_insn]); +      break; +   case TGSI_OPCODE_BGNSUB: +   case TGSI_OPCODE_ENDSUB: +      break; +   case TGSI_OPCODE_CAL: +      brw_push_insn_state(p); +      brw_set_mask_control(p, BRW_MASK_DISABLE); +      brw_set_access_mode(p, BRW_ALIGN_1); +      brw_ADD(p, deref_1ud(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); +      brw_set_access_mode(p, BRW_ALIGN_16); +      brw_ADD(p,  +	      get_addr_reg(c->stack_index), +	      get_addr_reg(c->stack_index), brw_imm_d(4)); +//      orig_inst = inst->Data; +//      orig_inst->Data = &p->store[p->nr_insn]; +      assert(0); +      brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); +      brw_pop_insn_state(p); +      break; + +   case TGSI_OPCODE_RET: +#if 0 +      brw_push_insn_state(p); +      brw_set_mask_control(p, BRW_MASK_DISABLE); +      brw_ADD(p,  +	      get_addr_reg(c->stack_index), +	      get_addr_reg(c->stack_index), brw_imm_d(-4)); +      brw_set_access_mode(p, BRW_ALIGN_1); +      brw_MOV(p, brw_ip_reg(), deref_1ud(c->stack_index, 0)); +      brw_set_access_mode(p, BRW_ALIGN_16); +      brw_pop_insn_state(p); +#else +      emit_fb_write(c, inst); +#endif + +      break; +   case TGSI_OPCODE_LOOP: +      c->loop_inst[c->loop_insn++] = brw_DO(p, BRW_EXECUTE_8); +      break; +   case TGSI_OPCODE_BRK: +      brw_BREAK(p); +      brw_set_predicate_control(p, BRW_PREDICATE_NONE); +      break; +   case TGSI_OPCODE_CONT: +      brw_CONT(p); +      brw_set_predicate_control(p, BRW_PREDICATE_NONE); +      break; +   case TGSI_OPCODE_ENDLOOP: +      c->loop_insn--; +      c->inst0 = c->inst1 = brw_WHILE(p, c->loop_inst[c->loop_insn]); +      /* patch all the BREAK instructions from +	 last BEGINLOOP */ +      while (c->inst0 > c->loop_inst[c->loop_insn]) { +	 c->inst0--; +	 if (c->inst0->header.opcode == BRW_OPCODE_BREAK) { +	    c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0 + 1; +	    c->inst0->bits3.if_else.pop_count = 0; +	 } else if (c->inst0->header.opcode == BRW_OPCODE_CONTINUE) { +	    c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0; +	    c->inst0->bits3.if_else.pop_count = 0; +	 } +      } +      break; +   case TGSI_OPCODE_END: +      emit_fb_write(c, inst); +      break; + +   default: +      debug_printf("unsupported IR in fragment shader %d\n", +		   inst->Instruction.Opcode); +   } +#if 0 +   if (inst->CondUpdate) +      brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); +   else +      brw_set_predicate_control(p, BRW_PREDICATE_NONE); +#endif +} + + + + + + +void brw_wm_glsl_emit(struct brw_wm_compile *c) +{ +   struct tgsi_parse_context parse; +   struct brw_compile *p = &c->func; + +   brw_init_compile(&c->func); +   brw_set_compression_control(p, BRW_COMPRESSION_NONE); + +   c->reg_index = 0; +   c->if_insn = 0; +   c->loop_insn = 0; +   c->stack_index = brw_indirect(0,0); + +   /* Do static register allocation and parameter interpolation: +    */ +   brw_wm_emit_decls( c ); + +   /* Emit the actual program.  All done with very direct translation, +    * hopefully we can improve on this shortly... +    */ +   brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); + +   tgsi_parse_init( &parse, c->fp->program.tokens ); + +   while( !tgsi_parse_end_of_tokens( &parse ) )  +   { +      tgsi_parse_token( &parse ); + +      switch( parse.FullToken.Token.Type ) { +      case TGSI_TOKEN_TYPE_DECLARATION: +	 /* already done */ +	 break; + +      case TGSI_TOKEN_TYPE_IMMEDIATE: +         /* not handled yet */ +	 assert(0); +         break; + +      case TGSI_TOKEN_TYPE_INSTRUCTION: +         brw_wm_emit_instruction(c, &parse.FullToken.FullInstruction); +         break; + +      default: +         assert( 0 ); +      } +   } + +   tgsi_parse_free (&parse); +    +   /* Fix up call targets: +    */ +#if 0 +   { +      unsigned nr_insns = c->fp->program.Base.NumInstructions; +      unsigned insn, target_insn; +      struct tgsi_full_instruction *inst1, *inst2; +      struct brw_instruction *brw_inst1, *brw_inst2; +      int offset; +      for (insn = 0; insn < nr_insns; insn++) { +	 inst1 = &c->fp->program.Base.Instructions[insn]; +	 brw_inst1 = inst1->Data; +	 switch (inst1->Opcode) { +	 case TGSI_OPCODE_CAL: +	    target_insn = inst1->BranchTarget; +	    inst2 = &c->fp->program.Base.Instructions[target_insn]; +	    brw_inst2 = inst2->Data; +	    offset = brw_inst2 - brw_inst1; +	    brw_set_src1(brw_inst1, brw_imm_d(offset*16)); +	    break; +	 default: +	    break; +	 } +      } +   } +#endif + +   c->prog_data.total_grf = c->reg_index; +   c->prog_data.total_scratch = 0; +} diff --git a/src/gallium/drivers/i965simple/brw_wm_iz.c b/src/gallium/drivers/i965simple/brw_wm_iz.c new file mode 100644 index 0000000000..6c5f25bf39 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_iz.c @@ -0,0 +1,214 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. +  + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: +  + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. +  + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +  + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + +#include "brw_wm.h" + + +#undef P			/* prompted depth */ +#undef C			/* computed */ +#undef N			/* non-promoted? */ + +#define P 0 +#define C 1 +#define N 2 + +const struct { +   unsigned mode:2; +   unsigned sd_present:1; +   unsigned sd_to_rt:1; +   unsigned dd_present:1; +   unsigned ds_present:1; +} wm_iz_table[IZ_BIT_MAX] = +{ + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { C, 0, 1, 0, 0 },  + { C, 0, 1, 0, 0 },  + { C, 1, 1, 0, 0 },  + { C, 1, 1, 0, 0 },  + { C, 0, 1, 0, 0 },  + { C, 0, 1, 0, 0 },  + { C, 1, 1, 1, 0 },  + { C, 1, 1, 1, 0 },  + { C, 0, 1, 1, 0 },  + { C, 0, 1, 1, 0 },  + { C, 1, 1, 1, 0 },  + { C, 1, 1, 1, 0 },  + { C, 0, 1, 1, 0 },  + { C, 0, 1, 1, 0 },  + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { C, 0, 1, 0, 0 },  + { C, 0, 1, 0, 0 },  + { C, 1, 1, 0, 0 },  + { C, 1, 1, 0, 0 },  + { C, 0, 1, 0, 0 },  + { C, 0, 1, 0, 0 },  + { C, 1, 1, 1, 0 },  + { C, 1, 1, 1, 0 },  + { C, 0, 1, 1, 0 },  + { C, 0, 1, 1, 0 },  + { C, 1, 1, 1, 0 },  + { C, 1, 1, 1, 0 },  + { C, 0, 1, 1, 0 },  + { C, 0, 1, 1, 0 },  + { C, 0, 0, 0, 1 },  + { C, 0, 0, 0, 1 },  + { C, 0, 1, 0, 1 },  + { C, 0, 1, 0, 1 },  + { C, 1, 1, 0, 1 },  + { C, 1, 1, 0, 1 },  + { C, 0, 1, 0, 1 },  + { C, 0, 1, 0, 1 },  + { C, 1, 1, 1, 1 },  + { C, 1, 1, 1, 1 },  + { C, 0, 1, 1, 1 },  + { C, 0, 1, 1, 1 },  + { C, 1, 1, 1, 1 },  + { C, 1, 1, 1, 1 },  + { C, 0, 1, 1, 1 },  + { C, 0, 1, 1, 1 },  + { C, 0, 0, 0, 1 },  + { C, 0, 0, 0, 1 },  + { C, 0, 1, 0, 1 },  + { C, 0, 1, 0, 1 },  + { C, 1, 1, 0, 1 },  + { C, 1, 1, 0, 1 },  + { C, 0, 1, 0, 1 },  + { C, 0, 1, 0, 1 },  + { C, 1, 1, 1, 1 },  + { C, 1, 1, 1, 1 },  + { C, 0, 1, 1, 1 },  + { C, 0, 1, 1, 1 },  + { C, 1, 1, 1, 1 },  + { C, 1, 1, 1, 1 },  + { C, 0, 1, 1, 1 },  + { C, 0, 1, 1, 1 },  + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { N, 1, 1, 0, 0 },  + { N, 0, 1, 0, 0 },  + { N, 0, 1, 0, 0 },  + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { C, 0, 1, 1, 0 },  + { C, 0, 1, 1, 0 },  + { P, 0, 0, 0, 0 },  + { N, 1, 1, 0, 0 },  + { C, 0, 1, 1, 0 },  + { C, 0, 1, 1, 0 },  + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { N, 1, 1, 0, 0 },  + { N, 0, 1, 0, 0 },  + { N, 0, 1, 0, 0 },  + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { C, 0, 1, 1, 0 },  + { C, 0, 1, 1, 0 },  + { P, 0, 0, 0, 0 },  + { N, 1, 1, 0, 0 },  + { C, 0, 1, 1, 0 },  + { C, 0, 1, 1, 0 },  + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { N, 1, 1, 0, 1 },  + { N, 0, 1, 0, 1 },  + { N, 0, 1, 0, 1 },  + { P, 0, 0, 0, 0 },  + { P, 0, 0, 0, 0 },  + { C, 0, 1, 1, 1 },  + { C, 0, 1, 1, 1 },  + { P, 0, 0, 0, 0 },  + { N, 1, 1, 0, 1 },  + { C, 0, 1, 1, 1 },  + { C, 0, 1, 1, 1 },  + { P, 0, 0, 0, 0 },  + { C, 0, 0, 0, 1 },  + { P, 0, 0, 0, 0 },  + { C, 0, 1, 0, 1 },  + { P, 0, 0, 0, 0 },  + { C, 1, 1, 0, 1 },  + { C, 0, 1, 0, 1 },  + { C, 0, 1, 0, 1 },  + { P, 0, 0, 0, 0 },  + { C, 1, 1, 1, 1 },  + { C, 0, 1, 1, 1 },  + { C, 0, 1, 1, 1 },  + { P, 0, 0, 0, 0 },  + { C, 1, 1, 1, 1 },  + { C, 0, 1, 1, 1 },  + { C, 0, 1, 1, 1 }  +}; + +void brw_wm_lookup_iz( unsigned line_aa, +		       unsigned lookup, +		       struct brw_wm_prog_key *key ) +{ +   unsigned reg = 2; + +   assert (lookup < IZ_BIT_MAX); +       +   if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) +      key->computes_depth = 1; + +   if (wm_iz_table[lookup].sd_present) { +      key->source_depth_reg = reg; +      reg += 2; +   } + +   if (wm_iz_table[lookup].sd_to_rt) +      key->source_depth_to_render_target = 1; + +   if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) { +      key->aa_dest_stencil_reg = reg; +      key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && +				      line_aa == AA_SOMETIMES); +      reg++; +   } + +   if (wm_iz_table[lookup].dd_present) { +      key->dest_depth_reg = reg; +      reg+=2; +   } + +   key->nr_depth_regs = (reg+1)/2; +} + diff --git a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c new file mode 100644 index 0000000000..52b2909a65 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c @@ -0,0 +1,275 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + + +#define COMPAREFUNC_ALWAYS		0 +#define COMPAREFUNC_NEVER		0x1 +#define COMPAREFUNC_LESS		0x2 +#define COMPAREFUNC_EQUAL		0x3 +#define COMPAREFUNC_LEQUAL		0x4 +#define COMPAREFUNC_GREATER		0x5 +#define COMPAREFUNC_NOTEQUAL		0x6 +#define COMPAREFUNC_GEQUAL		0x7 + +/* Samplers aren't strictly wm state from the hardware's perspective, + * but that is the only situation in which we use them in this driver. + */ + +static int intel_translate_shadow_compare_func(unsigned func) +{ +   switch(func) { +   case PIPE_FUNC_NEVER: +       return COMPAREFUNC_ALWAYS; +   case PIPE_FUNC_LESS: +       return COMPAREFUNC_LEQUAL; +   case PIPE_FUNC_LEQUAL: +       return COMPAREFUNC_LESS; +   case PIPE_FUNC_GREATER: +       return COMPAREFUNC_GEQUAL; +   case PIPE_FUNC_GEQUAL: +      return COMPAREFUNC_GREATER; +   case PIPE_FUNC_NOTEQUAL: +      return COMPAREFUNC_EQUAL; +   case PIPE_FUNC_EQUAL: +      return COMPAREFUNC_NOTEQUAL; +   case PIPE_FUNC_ALWAYS: +       return COMPAREFUNC_NEVER; +   } + +   debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func); +   return COMPAREFUNC_NEVER; +} + +/* The brw (and related graphics cores) do not support GL_CLAMP.  The + * Intel drivers for "other operating systems" implement GL_CLAMP as + * GL_CLAMP_TO_EDGE, so the same is done here. + */ +static unsigned translate_wrap_mode( int wrap ) +{ +   switch( wrap ) { +   case PIPE_TEX_WRAP_REPEAT: +      return BRW_TEXCOORDMODE_WRAP; +   case PIPE_TEX_WRAP_CLAMP: +      return BRW_TEXCOORDMODE_CLAMP; +   case PIPE_TEX_WRAP_CLAMP_TO_EDGE: +      return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */ +   case PIPE_TEX_WRAP_CLAMP_TO_BORDER: +      return BRW_TEXCOORDMODE_CLAMP_BORDER; +   case PIPE_TEX_WRAP_MIRROR_REPEAT: +      return BRW_TEXCOORDMODE_MIRROR; +   default: +      return BRW_TEXCOORDMODE_WRAP; +   } +} + + +static unsigned U_FIXED(float value, unsigned frac_bits) +{ +   value *= (1<<frac_bits); +   return value < 0 ? 0 : value; +} + +static int S_FIXED(float value, unsigned frac_bits) +{ +   return value * (1<<frac_bits); +} + + +static unsigned upload_default_color( struct brw_context *brw, +                                      const float *color ) +{ +   struct brw_sampler_default_color sdc; + +   COPY_4V(sdc.color, color); + +   return brw_cache_data( &brw->cache[BRW_SAMPLER_DEFAULT_COLOR], &sdc ); +} + + +/* + */ +static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_sampler, +				      unsigned sdc_gs_offset, +				      struct brw_sampler_state *sampler) +{ +   memset(sampler, 0, sizeof(*sampler)); + +   switch (pipe_sampler->min_mip_filter) { +   case PIPE_TEX_FILTER_NEAREST: +      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; +      break; +   case PIPE_TEX_FILTER_LINEAR: +      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; +      break; +   case PIPE_TEX_FILTER_ANISO: +      sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; +      break; +   default: +      break; +   } + +   switch (pipe_sampler->min_mip_filter) { +   case PIPE_TEX_MIPFILTER_NEAREST: +      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; +      break; +   case PIPE_TEX_MIPFILTER_LINEAR: +      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; +      break; +   case PIPE_TEX_MIPFILTER_NONE: +      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; +      break; +   default: +      break; +   } +   /* Set Anisotropy: +    */ +   switch (pipe_sampler->mag_img_filter) { +   case PIPE_TEX_FILTER_NEAREST: +      sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; +      break; +   case PIPE_TEX_FILTER_LINEAR: +      sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; +      break; +   case PIPE_TEX_FILTER_ANISO: +      sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; +      break; +   default: +      break; +   } + +   if (pipe_sampler->max_anisotropy > 2.0) { +      sampler->ss3.max_aniso = MAX2((pipe_sampler->max_anisotropy - 2) / 2, +                                    BRW_ANISORATIO_16); +   } + +   sampler->ss1.s_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_s); +   sampler->ss1.r_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_r); +   sampler->ss1.t_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_t); + +   /* Fulsim complains if I don't do this.  Hardware doesn't mind: +    */ +#if 0 +   if (texObj->Target == GL_TEXTURE_CUBE_MAP_ARB) { +      sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; +      sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; +      sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; +   } +#endif + +   /* Set shadow function: +    */ +   if (pipe_sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { +      /* Shadowing is "enabled" by emitting a particular sampler +       * message (sample_c).  So need to recompile WM program when +       * shadow comparison is enabled on each/any texture unit. +       */ +      sampler->ss0.shadow_function = intel_translate_shadow_compare_func(pipe_sampler->compare_func); +   } + +   /* Set LOD bias: +    */ +   sampler->ss0.lod_bias = S_FIXED(CLAMP(pipe_sampler->lod_bias, -16, 15), 6); + +   sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ +   sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ + +   /* Set BaseMipLevel, MaxLOD, MinLOD: +    * +    * XXX: I don't think that using firstLevel, lastLevel works, +    * because we always setup the surface state as if firstLevel == +    * level zero.  Probably have to subtract firstLevel from each of +    * these: +    */ +   sampler->ss0.base_level = U_FIXED(0, 1); + +   sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(pipe_sampler->max_lod, 0), 13), 6); +   sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(pipe_sampler->min_lod, 0), 13), 6); + +   sampler->ss2.default_color_pointer = sdc_gs_offset >> 5; +} + + + +/* All samplers must be uploaded in a single contiguous array, which + * complicates various things.  However, this is still too confusing - + * FIXME: simplify all the different new texture state flags. + */ +static void upload_wm_samplers(struct brw_context *brw) +{ +   unsigned unit; +   unsigned sampler_count = 0; + +   /* BRW_NEW_SAMPLER */ +   for (unit = 0; unit < brw->num_textures && unit < brw->num_samplers; +        unit++) { +      /* determine unit enable/disable by looking for a bound texture */ +      if (brw->attribs.Texture[unit]) { +         const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit]; +	 unsigned sdc_gs_offset = upload_default_color(brw, sampler->border_color); + +	 brw_update_sampler_state(sampler, +				  sdc_gs_offset, +				  &brw->wm.sampler[unit]); + +	 sampler_count = unit + 1; +      } +   } + +   if (brw->wm.sampler_count != sampler_count) { +      brw->wm.sampler_count = sampler_count; +      brw->state.dirty.cache |= CACHE_NEW_SAMPLER; +   } + +   brw->wm.sampler_gs_offset = 0; + +   if (brw->wm.sampler_count) +      brw->wm.sampler_gs_offset = +	 brw_cache_data_sz(&brw->cache[BRW_SAMPLER], +			   brw->wm.sampler, +			   sizeof(struct brw_sampler_state) * brw->wm.sampler_count); +} + +const struct brw_tracked_state brw_wm_samplers = { +   .dirty = { +      .brw = BRW_NEW_SAMPLER, +      .cache = 0 +   }, +   .update = upload_wm_samplers +}; + diff --git a/src/gallium/drivers/i965simple/brw_wm_state.c b/src/gallium/drivers/i965simple/brw_wm_state.c new file mode 100644 index 0000000000..37a9bf919c --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_state.c @@ -0,0 +1,195 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_wm.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +/*********************************************************************** + * WM unit - fragment programs and rasterization + */ +static void upload_wm_unit(struct brw_context *brw ) +{ +   struct brw_wm_unit_state wm; +   unsigned max_threads; +   unsigned per_thread; + +   if (BRW_DEBUG & DEBUG_SINGLE_THREAD) +      max_threads = 0; +   else +      max_threads = 31; + + +   memset(&wm, 0, sizeof(wm)); + +   /* CACHE_NEW_WM_PROG */ +   wm.thread0.grf_reg_count = align(brw->wm.prog_data->total_grf, 16) / 16 - 1; +   wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6; +   wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; +   wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; +   wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length; + +   wm.wm5.max_threads = max_threads; + +   per_thread = align(brw->wm.prog_data->total_scratch, 1024); +   assert(per_thread <= 12 * 1024); + +#if 0 +   if (brw->wm.prog_data->total_scratch) { +      unsigned total = per_thread * (max_threads + 1); + +      /* Scratch space -- just have to make sure there is sufficient +       * allocated for the active program and current number of threads. +       */ +      brw->wm.scratch_buffer_size = total; +      if (brw->wm.scratch_buffer && +	  brw->wm.scratch_buffer_size > brw->wm.scratch_buffer->size) { +	 dri_bo_unreference(brw->wm.scratch_buffer); +	 brw->wm.scratch_buffer = NULL; +      } +      if (!brw->wm.scratch_buffer) { +	 brw->wm.scratch_buffer = dri_bo_alloc(intel->intelScreen->bufmgr, +					       "wm scratch", +					       brw->wm.scratch_buffer_size, +					       4096, DRM_BO_FLAG_MEM_TT); +      } +   } +   /* XXX: Scratch buffers are not implemented correectly. +    * +    * The scratch offset to be programmed into wm is relative to the general +    * state base address.  However, using dri_bo_alloc/dri_bo_emit_reloc (or +    * the previous bmGenBuffers scheme), we get an offset relative to the +    * start of framebuffer.  Even before then, it was broken in other ways, +    * so just fail for now if we hit that path. +    */ +   assert(brw->wm.prog_data->total_scratch == 0); +#endif + +   /* CACHE_NEW_SURFACE */ +   wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces; + +   /* BRW_NEW_CURBE_OFFSETS */ +   wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; + +   wm.thread3.urb_entry_read_offset = 0; +   wm.thread1.depth_coef_urb_read_offset = 1; +   wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + +   /* CACHE_NEW_SAMPLER */ +   wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4; +   wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5; + +   /* BRW_NEW_FRAGMENT_PROGRAM */ +   { +      const struct brw_fragment_program *fp = brw->attribs.FragmentProgram; + +      if (fp->UsesDepth) +	 wm.wm5.program_uses_depth = 1; /* as far as we can tell */ + +      if (fp->info.writes_z) +	 wm.wm5.program_computes_depth = 1; + +      /* BRW_NEW_ALPHA_TEST */ +      if (fp->info.uses_kill || +	  brw->attribs.DepthStencil->alpha.enabled) +	 wm.wm5.program_uses_killpixel = 1; + +      wm.wm5.enable_8_pix = 1; +   } + +   wm.wm5.thread_dispatch_enable = 1;	/* AKA: color_write */ +   wm.wm5.legacy_line_rast = 0; +   wm.wm5.legacy_global_depth_bias = 0; +   wm.wm5.early_depth_test = 1;	        /* never need to disable */ +   wm.wm5.line_aa_region_width = 0; +   wm.wm5.line_endcap_aa_region_width = 1; + +   /* BRW_NEW_RASTERIZER */ +   if (brw->attribs.Raster->poly_stipple_enable) +      wm.wm5.polygon_stipple = 1; + +#if 0 +   if (brw->attribs.Polygon->OffsetFill) { +      wm.wm5.depth_offset = 1; +      /* Something wierd going on with legacy_global_depth_bias, +       * offset_constant, scaling and MRD.  This value passes glean +       * but gives some odd results elsewere (eg. the +       * quad-offset-units test). +       */ +      wm.global_depth_offset_constant = brw->attribs.Polygon->OffsetUnits * 2; + +      /* This is the only value that passes glean: +       */ +      wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor; +   } +#endif + +   if (brw->attribs.Raster->line_stipple_enable) { +      wm.wm5.line_stipple = 1; +   } + +   if (BRW_DEBUG & DEBUG_STATS) +      wm.wm4.stats_enable = 1; + +   brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm ); + +   if (brw->wm.prog_data->total_scratch) { +      /* +      dri_emit_reloc(brw->cache[BRW_WM_UNIT].pool->buffer, +		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, +		     (per_thread / 1024) - 1, +		     brw->wm.state_gs_offset + +		     ((char *)&wm.thread2 - (char *)&wm), +		     brw->wm.scratch_buffer); +      */ +   } else { +      wm.thread2.scratch_space_base_pointer = 0; +   } +} + +const struct brw_tracked_state brw_wm_unit = { +   .dirty = { +      .brw = (BRW_NEW_RASTERIZER | +	      BRW_NEW_ALPHA_TEST | +	      BRW_NEW_FS | +	      BRW_NEW_CURBE_OFFSETS), + +      .cache = (CACHE_NEW_SURFACE | +		CACHE_NEW_WM_PROG | +		CACHE_NEW_SAMPLER) +   }, +   .update = upload_wm_unit +}; + diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c new file mode 100644 index 0000000000..1bab5bfdb3 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c @@ -0,0 +1,305 @@ +/* + Copyright (C) Intel Corp.  2006.  All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +static unsigned translate_tex_target( enum pipe_texture_target target ) +{ +   switch (target) { +   case PIPE_TEXTURE_1D: +      return BRW_SURFACE_1D; + +   case PIPE_TEXTURE_2D: +      return BRW_SURFACE_2D; + +   case PIPE_TEXTURE_3D: +      return BRW_SURFACE_3D; + +   case PIPE_TEXTURE_CUBE: +      return BRW_SURFACE_CUBE; + +   default: +      assert(0); +      return 0; +   } +} + +static unsigned translate_tex_format( enum pipe_format pipe_format ) +{ +   switch( pipe_format ) { +   case PIPE_FORMAT_L8_UNORM: +      return BRW_SURFACEFORMAT_L8_UNORM; + +   case PIPE_FORMAT_I8_UNORM: +      return BRW_SURFACEFORMAT_I8_UNORM; + +   case PIPE_FORMAT_A8_UNORM: +      return BRW_SURFACEFORMAT_A8_UNORM; + +   case PIPE_FORMAT_A8L8_UNORM: +      return BRW_SURFACEFORMAT_L8A8_UNORM; + +   case PIPE_FORMAT_R8G8B8_UNORM: +      assert(0);		/* not supported for sampling */ +      return BRW_SURFACEFORMAT_R8G8B8_UNORM; + +   case PIPE_FORMAT_B8G8R8A8_UNORM: +      return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + +   case PIPE_FORMAT_R8G8B8A8_UNORM: +      return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + +   case PIPE_FORMAT_R5G6B5_UNORM: +      return BRW_SURFACEFORMAT_B5G6R5_UNORM; + +   case PIPE_FORMAT_A1R5G5B5_UNORM: +      return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + +   case PIPE_FORMAT_A4R4G4B4_UNORM: +      return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + +   case PIPE_FORMAT_YCBCR_REV: +      return BRW_SURFACEFORMAT_YCRCB_NORMAL; + +   case PIPE_FORMAT_YCBCR: +      return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; +#if 0 +   case PIPE_FORMAT_RGB_FXT1: +   case PIPE_FORMAT_RGBA_FXT1: +      return BRW_SURFACEFORMAT_FXT1; +#endif + +   case PIPE_FORMAT_Z16_UNORM: +      return BRW_SURFACEFORMAT_I16_UNORM; +#if 0 +   case PIPE_FORMAT_RGB_DXT1: +       return BRW_SURFACEFORMAT_DXT1_RGB; + +   case PIPE_FORMAT_RGBA_DXT1: +       return BRW_SURFACEFORMAT_BC1_UNORM; + +   case PIPE_FORMAT_RGBA_DXT3: +       return BRW_SURFACEFORMAT_BC2_UNORM; + +   case PIPE_FORMAT_RGBA_DXT5: +       return BRW_SURFACEFORMAT_BC3_UNORM; + +   case PIPE_FORMAT_SRGBA8: +      return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB; +   case PIPE_FORMAT_SRGB_DXT1: +      return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; +#endif + +   default: +      assert(0); +      return 0; +   } +} + +static unsigned brw_buffer_offset(struct brw_context *brw, +                                  struct pipe_buffer *buffer) +{ +   return brw->winsys->get_buffer_offset(brw->winsys, +                                         buffer, +                                         0); +} + +static +void brw_update_texture_surface( struct brw_context *brw, +				 unsigned unit ) +{ +   const struct brw_texture *tObj = brw->attribs.Texture[unit]; +   struct brw_surface_state surf; + +   memset(&surf, 0, sizeof(surf)); + +   surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; +   surf.ss0.surface_type = translate_tex_target(tObj->base.target); +   surf.ss0.surface_format = translate_tex_format(tObj->base.format); + +   /* This is ok for all textures with channel width 8bit or less: +    */ +/*    surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + +   /* Updated in emit_reloc */ +   surf.ss1.base_addr = brw_buffer_offset( brw, tObj->buffer ); + +   surf.ss2.mip_count = tObj->base.last_level; +   surf.ss2.width = tObj->base.width[0] - 1; +   surf.ss2.height = tObj->base.height[0] - 1; + +   surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; +   surf.ss3.tiled_surface = 0; /* always zero */ +   surf.ss3.pitch = tObj->stride - 1; +   surf.ss3.depth = tObj->base.depth[0] - 1; + +   surf.ss4.min_lod = 0; + +   if (tObj->base.target == PIPE_TEXTURE_CUBE) { +      surf.ss0.cube_pos_x = 1; +      surf.ss0.cube_pos_y = 1; +      surf.ss0.cube_pos_z = 1; +      surf.ss0.cube_neg_x = 1; +      surf.ss0.cube_neg_y = 1; +      surf.ss0.cube_neg_z = 1; +   } + +   brw->wm.bind.surf_ss_offset[unit + 1] = +      brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); +} + + + +#define OFFSET(TYPE, FIELD) ( (unsigned)&(((TYPE *)0)->FIELD) ) + + +static void upload_wm_surfaces(struct brw_context *brw ) +{ +   unsigned i; + +   { +      struct brw_surface_state surf; + +      /* BRW_NEW_FRAMEBUFFER +       */ +      struct pipe_surface *pipe_surface = brw->attribs.FrameBuffer.cbufs[0];/*fixme*/ +      struct brw_texture *tex = (struct brw_texture *)pipe_surface->texture; + +      memset(&surf, 0, sizeof(surf)); + +      if (pipe_surface != NULL) { +	 if (pipe_surface->block.size == 4) +	    surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; +	 else +	    surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + +	 surf.ss0.surface_type = BRW_SURFACE_2D; + +	 surf.ss1.base_addr = brw_buffer_offset( brw, tex->buffer ); + +	 surf.ss2.width = pipe_surface->width - 1; +	 surf.ss2.height = pipe_surface->height - 1; +	 surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; +	 surf.ss3.tiled_surface = 0; +	 surf.ss3.pitch = pipe_surface->stride - 1; +      } else { +	 surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; +	 surf.ss0.surface_type = BRW_SURFACE_NULL; +      } + +      /* BRW_NEW_BLEND */ +      surf.ss0.color_blend = (!brw->attribs.Blend->logicop_enable && +			      brw->attribs.Blend->blend_enable); + + +      surf.ss0.writedisable_red =   !(brw->attribs.Blend->colormask & PIPE_MASK_R); +      surf.ss0.writedisable_green = !(brw->attribs.Blend->colormask & PIPE_MASK_G); +      surf.ss0.writedisable_blue =  !(brw->attribs.Blend->colormask & PIPE_MASK_B); +      surf.ss0.writedisable_alpha = !(brw->attribs.Blend->colormask & PIPE_MASK_A); + + + + +      brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); + +      brw->wm.nr_surfaces = 1; +   } + + +   /* BRW_NEW_TEXTURE +    */ +   for (i = 0; i < brw->num_textures && i < brw->num_samplers; i++) { +      const struct brw_texture *texUnit = brw->attribs.Texture[i]; + +      if (texUnit && +	  texUnit->base.refcount/*(texUnit->refcount > 0) == really used */) { + +	 brw_update_texture_surface(brw, i); + +	 brw->wm.nr_surfaces = i+2; +      } +      else { +	 brw->wm.bind.surf_ss_offset[i+1] = 0; +      } +   } + +   brw->wm.bind_ss_offset = brw_cache_data( &brw->cache[BRW_SS_SURF_BIND], +					    &brw->wm.bind ); +} + + +/* KW: Will find a different way to acheive this, see for example the + * state caches with relocs in the i915 swz driver. + */ +#if 0 +static void emit_reloc_wm_surfaces(struct brw_context *brw) +{ +   int unit; + +   if (brw->state.draw_region != NULL) { +      /* Emit framebuffer relocation */ +      dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), +		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, +		     0, +		     brw->wm.bind.surf_ss_offset[0] + +		     offsetof(struct brw_surface_state, ss1), +		     brw->state.draw_region->buffer); +   } + +   /* Emit relocations for texture buffers */ +   for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { +      struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; +      struct gl_texture_object *tObj = texUnit->_Current; +      struct intel_texture_object *intelObj = intel_texture_object(tObj); + +      if (texUnit->_ReallyEnabled && intelObj->mt != NULL) { +	 dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), +			DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, +			0, +			brw->wm.bind.surf_ss_offset[unit + 1] + +			offsetof(struct brw_surface_state, ss1), +			intelObj->mt->region->buffer); +      } +   } +} +#endif + +const struct brw_tracked_state brw_wm_surfaces = { +   .dirty = { +      .brw = (BRW_NEW_FRAMEBUFFER | +	      BRW_NEW_BLEND | +	      BRW_NEW_TEXTURE), +      .cache = 0 +   }, +   .update = upload_wm_surfaces, +}; diff --git a/src/gallium/drivers/nouveau/nouveau_gldefs.h b/src/gallium/drivers/nouveau/nouveau_gldefs.h new file mode 100644 index 0000000000..ff97aaa9af --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_gldefs.h @@ -0,0 +1,196 @@ +#ifndef __NOUVEAU_GLDEFS_H__ +#define __NOUVEAU_GLDEFS_H__ + +static INLINE unsigned +nvgl_blend_func(unsigned factor) +{ +	switch (factor) { +	case PIPE_BLENDFACTOR_ZERO: +		return 0x0000; +	case PIPE_BLENDFACTOR_ONE: +		return 0x0001; +	case PIPE_BLENDFACTOR_SRC_COLOR: +		return 0x0300; +	case PIPE_BLENDFACTOR_INV_SRC_COLOR: +		return 0x0301; +	case PIPE_BLENDFACTOR_SRC_ALPHA: +		return 0x0302; +	case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +		return 0x0303; +	case PIPE_BLENDFACTOR_DST_ALPHA: +		return 0x0304; +	case PIPE_BLENDFACTOR_INV_DST_ALPHA: +		return 0x0305; +	case PIPE_BLENDFACTOR_DST_COLOR: +		return 0x0306; +	case PIPE_BLENDFACTOR_INV_DST_COLOR: +		return 0x0307; +	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: +		return 0x0308; +	case PIPE_BLENDFACTOR_CONST_COLOR: +		return 0x8001; +	case PIPE_BLENDFACTOR_INV_CONST_COLOR: +		return 0x8002; +	case PIPE_BLENDFACTOR_CONST_ALPHA: +		return 0x8003; +	case PIPE_BLENDFACTOR_INV_CONST_ALPHA: +		return 0x8004; +	default: +		return 0x0000; +	} +} + +static INLINE unsigned +nvgl_blend_eqn(unsigned func) +{ +	switch (func) { +	case PIPE_BLEND_ADD: +		return 0x8006; +	case PIPE_BLEND_MIN: +		return 0x8007; +	case PIPE_BLEND_MAX: +		return 0x8008; +	case PIPE_BLEND_SUBTRACT: +		return 0x800a; +	case PIPE_BLEND_REVERSE_SUBTRACT: +		return 0x800b; +	default: +		return 0x8006; +	} +} + +static INLINE unsigned +nvgl_logicop_func(unsigned func) +{ +	switch (func) { +	case PIPE_LOGICOP_CLEAR: +		return 0x1500; +	case PIPE_LOGICOP_NOR: +		return 0x1508; +	case PIPE_LOGICOP_AND_INVERTED: +		return 0x1504; +	case PIPE_LOGICOP_COPY_INVERTED: +		return 0x150c; +	case PIPE_LOGICOP_AND_REVERSE: +		return 0x1502; +	case PIPE_LOGICOP_INVERT: +		return 0x150a; +	case PIPE_LOGICOP_XOR: +		return 0x1506; +	case PIPE_LOGICOP_NAND: +		return 0x150e; +	case PIPE_LOGICOP_AND: +		return 0x1501; +	case PIPE_LOGICOP_EQUIV: +		return 0x1509; +	case PIPE_LOGICOP_NOOP: +		return 0x1505; +	case PIPE_LOGICOP_OR_INVERTED: +		return 0x150d; +	case PIPE_LOGICOP_COPY: +		return 0x1503; +	case PIPE_LOGICOP_OR_REVERSE: +		return 0x150b; +	case PIPE_LOGICOP_OR: +		return 0x1507; +	case PIPE_LOGICOP_SET: +		return 0x150f; +	default: +		return 0x1505; +	} +} + +static INLINE unsigned +nvgl_comparison_op(unsigned op) +{ +	switch (op) { +	case PIPE_FUNC_NEVER: +		return 0x0200; +	case PIPE_FUNC_LESS: +		return 0x0201; +	case PIPE_FUNC_EQUAL: +		return 0x0202; +	case PIPE_FUNC_LEQUAL: +		return 0x0203; +	case PIPE_FUNC_GREATER: +		return 0x0204; +	case PIPE_FUNC_NOTEQUAL: +		return 0x0205; +	case PIPE_FUNC_GEQUAL: +		return 0x0206; +	case PIPE_FUNC_ALWAYS: +		return 0x0207; +	default: +		return 0x0207; +	} +} + +static INLINE unsigned +nvgl_polygon_mode(unsigned mode) +{ +	switch (mode) { +	case PIPE_POLYGON_MODE_POINT: +		return 0x1b00; +	case PIPE_POLYGON_MODE_LINE: +		return 0x1b01; +	case PIPE_POLYGON_MODE_FILL: +		return 0x1b02; +	default: +		return 0x1b02; +	} +} + +static INLINE unsigned +nvgl_stencil_op(unsigned op) +{ +	switch (op) { +	case PIPE_STENCIL_OP_ZERO: +		return 0x0000; +	case PIPE_STENCIL_OP_INVERT: +		return 0x150a; +	case PIPE_STENCIL_OP_KEEP: +		return 0x1e00; +	case PIPE_STENCIL_OP_REPLACE: +		return 0x1e01; +	case PIPE_STENCIL_OP_INCR: +		return 0x1e02; +	case PIPE_STENCIL_OP_DECR: +		return 0x1e03; +	case PIPE_STENCIL_OP_INCR_WRAP: +		return 0x8507; +	case PIPE_STENCIL_OP_DECR_WRAP: +		return 0x8508; +	default: +		return 0x1e00; +	} +} + +static INLINE unsigned +nvgl_primitive(unsigned prim) { +	switch (prim) { +	case PIPE_PRIM_POINTS: +		return 0x0001; +	case PIPE_PRIM_LINES: +		return 0x0002; +	case PIPE_PRIM_LINE_LOOP: +		return 0x0003; +	case PIPE_PRIM_LINE_STRIP: +		return 0x0004; +	case PIPE_PRIM_TRIANGLES: +		return 0x0005; +	case PIPE_PRIM_TRIANGLE_STRIP: +		return 0x0006; +	case PIPE_PRIM_TRIANGLE_FAN: +		return 0x0007; +	case PIPE_PRIM_QUADS: +		return 0x0008; +	case PIPE_PRIM_QUAD_STRIP: +		return 0x0009; +	case PIPE_PRIM_POLYGON: +		return 0x000a; +	default: +		return 0; +	} +} + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h new file mode 100644 index 0000000000..54ef1c1291 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_push.h @@ -0,0 +1,82 @@ +#ifndef __NOUVEAU_PUSH_H__ +#define __NOUVEAU_PUSH_H__ + +#include "nouveau/nouveau_winsys.h" + +#ifndef NOUVEAU_PUSH_CONTEXT +#error undefined push context +#endif + +#define OUT_RING(data) do {                                                    \ +	NOUVEAU_PUSH_CONTEXT(pc);                                              \ +	(*pc->nvws->channel->pushbuf->cur++) = (data);                         \ +} while(0) + +#define OUT_RINGp(src,size) do {                                               \ +	NOUVEAU_PUSH_CONTEXT(pc);                                              \ +	memcpy(pc->nvws->channel->pushbuf->cur, (src), (size) * 4);            \ +	pc->nvws->channel->pushbuf->cur += (size);                             \ +} while(0) + +#define OUT_RINGf(data) do {                                                   \ +	union { float v; uint32_t u; } c;                                      \ +	c.v = (data);                                                          \ +	OUT_RING(c.u);                                                         \ +} while(0) + +#define BEGIN_RING(obj,mthd,size) do {                                         \ +	NOUVEAU_PUSH_CONTEXT(pc);                                              \ +	if (pc->nvws->channel->pushbuf->remaining < ((size) + 1))              \ +		pc->nvws->push_flush(pc->nvws, ((size) + 1), NULL);            \ +	OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd));             \ +	pc->nvws->channel->pushbuf->remaining -= ((size) + 1);                 \ +} while(0) + +#define BEGIN_RING_NI(obj,mthd,size) do {                                      \ +	BEGIN_RING(obj, (mthd) | 0x40000000, (size));                          \ +} while(0) + +#define FIRE_RING(fence) do {                                                  \ +	NOUVEAU_PUSH_CONTEXT(pc);                                              \ +	pc->nvws->push_flush(pc->nvws, 0, fence);                              \ +} while(0) + +#define OUT_RELOC(bo,data,flags,vor,tor) do {                                  \ +	NOUVEAU_PUSH_CONTEXT(pc);                                              \ +	pc->nvws->push_reloc(pc->nvws, pc->nvws->channel->pushbuf->cur++,      \ +			     (bo), (data), (flags), (vor), (tor));             \ +} while(0) + +/* Raw data + flags depending on FB/TT buffer */ +#define OUT_RELOCd(bo,data,flags,vor,tor) do {                                 \ +	OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor));        \ +} while(0) + +/* FB/TT object handle */ +#define OUT_RELOCo(bo,flags) do {                                              \ +	OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR,                            \ +		  pc->nvws->channel->vram->handle,                             \ +		  pc->nvws->channel->gart->handle);                            \ +} while(0) + +/* Low 32-bits of offset */ +#define OUT_RELOCl(bo,delta,flags) do {                                        \ +	OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0);              \ +} while(0) + +/* High 32-bits of offset */ +#define OUT_RELOCh(bo,delta,flags) do {                                        \ +	OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0);             \ +} while(0) + +/* A reloc which'll recombine into a NV_DMA_METHOD packet header */ +#define OUT_RELOCm(bo, flags, obj, mthd, size) do {                            \ +	NOUVEAU_PUSH_CONTEXT(pc);                                              \ +	if (pc->nvws->channel->pushbuf->remaining < ((size) + 1))              \ +		pc->nvws->push_flush(pc->nvws->channel, ((size) + 1), NULL);   \ +	OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd),      \ +		   (flags), 0, 0);                                             \ +	pc->nvws->channel->pushbuf->remaining -= ((size) + 1);                 \ +} while(0) + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h new file mode 100644 index 0000000000..4ae4ff4940 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -0,0 +1,159 @@ +#ifndef __NOUVEAU_STATEOBJ_H__ +#define __NOUVEAU_STATEOBJ_H__ + +#include "pipe/p_debug.h" + +struct nouveau_stateobj_reloc { +	struct pipe_buffer *bo; + +	unsigned offset; +	unsigned packet; + +	unsigned data; +	unsigned flags; +	unsigned vor; +	unsigned tor; +}; + +struct nouveau_stateobj { +	int refcount; + +	unsigned *push; +	struct nouveau_stateobj_reloc *reloc; + +	unsigned *cur; +	unsigned cur_packet; +	unsigned cur_reloc; +}; + +static INLINE struct nouveau_stateobj * +so_new(unsigned push, unsigned reloc) +{ +	struct nouveau_stateobj *so; + +	so = MALLOC(sizeof(struct nouveau_stateobj)); +	so->refcount = 0; +	so->push = MALLOC(sizeof(unsigned) * push); +	so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc); + +	so->cur = so->push; +	so->cur_reloc = so->cur_packet = 0; + +	return so; +} + +static INLINE void +so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso) +{ +	struct nouveau_stateobj *so = *pso; + +	if (ref) { +		ref->refcount++; +	} + +	if (so && --so->refcount <= 0) { +		free(so->push); +		free(so->reloc); +		free(so); +	} + +	*pso = ref; +} + +static INLINE void +so_data(struct nouveau_stateobj *so, unsigned data) +{ +	(*so->cur++) = (data); +	so->cur_packet += 4; +} + +static INLINE void +so_datap(struct nouveau_stateobj *so, unsigned *data, unsigned size) +{ +	so->cur_packet += (4 * size); +	while (size--) +		(*so->cur++) = (*data++); +} + +static INLINE void +so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr, +	  unsigned mthd, unsigned size) +{ +	so->cur_packet = (gr->subc << 13) | (1 << 18) | (mthd - 4); +	so_data(so, (gr->subc << 13) | (size << 18) | mthd); +} + +static INLINE void +so_reloc(struct nouveau_stateobj *so, struct pipe_buffer *bo, +	 unsigned data, unsigned flags, unsigned vor, unsigned tor) +{ +	struct nouveau_stateobj_reloc *r = &so->reloc[so->cur_reloc++]; +	 +	r->bo = bo; +	r->offset = so->cur - so->push; +	r->packet = so->cur_packet; +	r->data = data; +	r->flags = flags; +	r->vor = vor; +	r->tor = tor; +	so_data(so, data); +} + +static INLINE void +so_dump(struct nouveau_stateobj *so) +{ +	unsigned i, nr = so->cur - so->push; + +	for (i = 0; i < nr; i++) +		debug_printf("+0x%04x: 0x%08x\n", i, so->push[i]); +} + +static INLINE void +so_emit(struct nouveau_winsys *nvws, struct nouveau_stateobj *so) +{ +	struct nouveau_pushbuf *pb = nvws->channel->pushbuf; +	unsigned nr, i; + +	nr = so->cur - so->push; +	if (pb->remaining < nr) +		nvws->push_flush(nvws, nr, NULL); +	pb->remaining -= nr; + +	memcpy(pb->cur, so->push, nr * 4); +	for (i = 0; i < so->cur_reloc; i++) { +		struct nouveau_stateobj_reloc *r = &so->reloc[i]; + +		nvws->push_reloc(nvws, pb->cur + r->offset, r->bo, +				 r->data, r->flags, r->vor, r->tor); +	} +	pb->cur += nr; +} + +static INLINE void +so_emit_reloc_markers(struct nouveau_winsys *nvws, struct nouveau_stateobj *so) +{ +	struct nouveau_pushbuf *pb = nvws->channel->pushbuf; +	unsigned i; + +	if (!so) +		return; + +	i = so->cur_reloc << 1; +	if (nvws->channel->pushbuf->remaining < i) +		nvws->push_flush(nvws, i, NULL); +	nvws->channel->pushbuf->remaining -= i; + +	for (i = 0; i < so->cur_reloc; i++) { +		struct nouveau_stateobj_reloc *r = &so->reloc[i]; + +		nvws->push_reloc(nvws, pb->cur++, r->bo, r->packet, +				 (r->flags & (NOUVEAU_BO_VRAM | +					      NOUVEAU_BO_GART | +					      NOUVEAU_BO_RDWR)) | +				 NOUVEAU_BO_DUMMY, 0, 0); +		nvws->push_reloc(nvws, pb->cur++, r->bo, r->data, +				 r->flags | NOUVEAU_BO_DUMMY, r->vor, r->tor); +	} +} + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h new file mode 100644 index 0000000000..a10114beab --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_util.h @@ -0,0 +1,91 @@ +#ifndef __NOUVEAU_UTIL_H__ +#define __NOUVEAU_UTIL_H__ + +/* Determine how many vertices can be pushed into the command stream. + * Where the remaining space isn't large enough to represent all verices, + * split the buffer at primitive boundaries. + * + * Returns a count of vertices that can be rendered, and an index to + * restart drawing at after a flush. + */ +static INLINE unsigned +nouveau_vbuf_split(unsigned remaining, unsigned overhead, unsigned vpp, +		   unsigned mode, unsigned start, unsigned count, +		   unsigned *restart) +{ +	int max, adj = 0; + +	max  = remaining - overhead; +	if (max < 0) +		return 0; + +	max *= vpp; +	if (max >= count) +		return count; + +	switch (mode) { +	case PIPE_PRIM_POINTS: +		break; +	case PIPE_PRIM_LINES: +		max = max & 1; +		break; +	case PIPE_PRIM_TRIANGLES: +		max = max - (max % 3); +		break; +	case PIPE_PRIM_QUADS: +		max = max & 3; +		break; +	case PIPE_PRIM_LINE_LOOP: +	case PIPE_PRIM_LINE_STRIP: +		if (max < 2) +			max = 0; +		adj = 1; +		break; +	case PIPE_PRIM_POLYGON: +	case PIPE_PRIM_TRIANGLE_STRIP: +	case PIPE_PRIM_TRIANGLE_FAN: +		if (max < 3) +			max = 0; +		adj = 2; +		break; +	case PIPE_PRIM_QUAD_STRIP: +		if (max < 4) +			max = 0; +		adj = 3; +		break; +	default: +		assert(0); +	} + +	*restart = start + max - adj; +	return max; +} + +/* Integer base-2 logarithm, rounded towards zero. */ +static INLINE unsigned log2i(unsigned i) +{ +	unsigned r = 0; + +	if (i & 0xffff0000) { +		i >>= 16; +		r += 16; +	} +	if (i & 0x0000ff00) { +		i >>= 8; +		r += 8; +	} +	if (i & 0x000000f0) { +		i >>= 4; +		r += 4; +	} +	if (i & 0x0000000c) { +		i >>= 2; +		r += 2; +	} +	if (i & 0x00000002) { +		r += 1; +	} +	return r; +} + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h new file mode 100644 index 0000000000..b86c4b9338 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -0,0 +1,100 @@ +#ifndef NOUVEAU_WINSYS_H +#define NOUVEAU_WINSYS_H + +#include <stdint.h> +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_defines.h" + +#include "nouveau/nouveau_bo.h" +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_class.h" +#include "nouveau/nouveau_device.h" +#include "nouveau/nouveau_grobj.h" +#include "nouveau/nouveau_notifier.h" +#include "nouveau/nouveau_resource.h" +#include "nouveau/nouveau_pushbuf.h" + +#define NOUVEAU_CAP_HW_VTXBUF (0xbeef0000) +#define NOUVEAU_CAP_HW_IDXBUF (0xbeef0001) + +#define NOUVEAU_TEXTURE_USAGE_LINEAR (1 << 16) + +#define NOUVEAU_BUFFER_USAGE_TEXTURE (1 << 16) +#define NOUVEAU_BUFFER_USAGE_ZETA    (1 << 17) + +struct nouveau_winsys { +	struct nouveau_context *nv; + +	struct nouveau_channel *channel; + +	int  (*res_init)(struct nouveau_resource **heap, unsigned start, +			 unsigned size); +	int  (*res_alloc)(struct nouveau_resource *heap, int size, void *priv, +			  struct nouveau_resource **); +	void (*res_free)(struct nouveau_resource **); + +	int  (*push_reloc)(struct nouveau_winsys *, void *ptr, +			   struct pipe_buffer *, uint32_t data, +			   uint32_t flags, uint32_t vor, uint32_t tor); +	int  (*push_flush)(struct nouveau_winsys *, unsigned size, +			   struct pipe_fence_handle **fence); +			        +	int       (*grobj_alloc)(struct nouveau_winsys *, int grclass, +				 struct nouveau_grobj **); +	void      (*grobj_free)(struct nouveau_grobj **); + +	int       (*notifier_alloc)(struct nouveau_winsys *, int count, +				    struct nouveau_notifier **); +	void      (*notifier_free)(struct nouveau_notifier **); +	void      (*notifier_reset)(struct nouveau_notifier *, int id); +	uint32_t  (*notifier_status)(struct nouveau_notifier *, int id); +	uint32_t  (*notifier_retval)(struct nouveau_notifier *, int id); +	int       (*notifier_wait)(struct nouveau_notifier *, int id, +				   int status, double timeout); + +	int (*surface_copy)(struct nouveau_winsys *, struct pipe_surface *, +			    unsigned, unsigned, struct pipe_surface *, +			    unsigned, unsigned, unsigned, unsigned); +	int (*surface_fill)(struct nouveau_winsys *, struct pipe_surface *, +			    unsigned, unsigned, unsigned, unsigned, unsigned); + +	struct nouveau_bo *(*get_bo)(struct pipe_buffer *); +}; + +extern struct pipe_screen * +nv04_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv04_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv10_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv20_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv20_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv30_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv40_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv50_create(struct pipe_screen *, unsigned pctx_id); + +#endif diff --git a/src/gallium/drivers/nv04/Makefile b/src/gallium/drivers/nv04/Makefile new file mode 100644 index 0000000000..4ed62dae95 --- /dev/null +++ b/src/gallium/drivers/nv04/Makefile @@ -0,0 +1,29 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv04 + +DRIVER_SOURCES = \ +	nv04_surface_2d.c \ +	nv04_clear.c \ +	nv04_context.c \ +	nv04_fragprog.c \ +	nv04_fragtex.c \ +	nv04_miptree.c \ +	nv04_prim_vbuf.c \ +	nv04_screen.c \ +	nv04_state.c \ +	nv04_state_emit.c \ +	nv04_surface.c \ +	nv04_vbo.c + +C_SOURCES = \ +	$(COMMON_SOURCES) \ +	$(DRIVER_SOURCES) + +ASM_SOURCES =  + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv04/nv04_clear.c b/src/gallium/drivers/nv04/nv04_clear.c new file mode 100644 index 0000000000..01cacd36fe --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_clear.c @@ -0,0 +1,12 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv04_context.h" + +void +nv04_clear(struct pipe_context *pipe, struct pipe_surface *ps, +	   unsigned clearValue) +{ +	pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +} diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c new file mode 100644 index 0000000000..d6710cd892 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_context.c @@ -0,0 +1,107 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv04_context.h" +#include "nv04_screen.h" + +static void +nv04_flush(struct pipe_context *pipe, unsigned flags, +	   struct pipe_fence_handle **fence) +{ +	struct nv04_context *nv04 = nv04_context(pipe); + +	draw_flush(nv04->draw); + +	FIRE_RING(fence); +} + +static void +nv04_destroy(struct pipe_context *pipe) +{ +	struct nv04_context *nv04 = nv04_context(pipe); + +	if (nv04->draw) +		draw_destroy(nv04->draw); + +	FREE(nv04); +} + +static void +nv04_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + +static boolean +nv04_init_hwctx(struct nv04_context *nv04) +{ +	// requires a valid handle +//	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOTIFY, 1); +//	OUT_RING(0); +	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOP, 1); +	OUT_RING(0); + +	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); +	OUT_RING(0x40182800); +//	OUT_RING(1<<20/*no cull*/); +	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); +//	OUT_RING(0x24|(1<<6)|(1<<8)); +	OUT_RING(0x120001a4); +	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FORMAT, 1); +	OUT_RING(0x332213a1); +	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FILTER, 1); +	OUT_RING(0x11001010); +	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_COLORKEY, 1); +	OUT_RING(0x0); +//	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 1); +//	OUT_RING(SCREEN_OFFSET); +	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR, 1); +	OUT_RING(0xff000000); + + + +	FIRE_RING (NULL); +	return TRUE; +} + +struct pipe_context * +nv04_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ +	struct nv04_screen *screen = nv04_screen(pscreen); +	struct pipe_winsys *ws = pscreen->winsys; +	struct nv04_context *nv04; +	struct nouveau_winsys *nvws = screen->nvws; + +	nv04 = CALLOC(1, sizeof(struct nv04_context)); +	if (!nv04) +		return NULL; +	nv04->screen = screen; +	nv04->pctx_id = pctx_id; + +	nv04->nvws = nvws; + +	nv04->pipe.winsys = ws; +	nv04->pipe.screen = pscreen; +	nv04->pipe.destroy = nv04_destroy; +	nv04->pipe.set_edgeflags = nv04_set_edgeflags; +	nv04->pipe.draw_arrays = nv04_draw_arrays; +	nv04->pipe.draw_elements = nv04_draw_elements; +	nv04->pipe.clear = nv04_clear; +	nv04->pipe.flush = nv04_flush; + +	nv04_init_surface_functions(nv04); +	nv04_init_state_functions(nv04); + +	nv04->draw = draw_create(); +	assert(nv04->draw); +	draw_wide_point_threshold(nv04->draw, 0.0); +	draw_wide_line_threshold(nv04->draw, 0.0); +	draw_enable_line_stipple(nv04->draw, FALSE); +	draw_enable_point_sprites(nv04->draw, FALSE); +	draw_set_rasterize_stage(nv04->draw, nv04_draw_vbuf_stage(nv04)); + +	nv04_init_hwctx(nv04); + +	return &nv04->pipe; +} + diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h new file mode 100644 index 0000000000..2842b2c90d --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_context.h @@ -0,0 +1,151 @@ +#ifndef __NV04_CONTEXT_H__ +#define __NV04_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx)                                              \ +	struct nv04_screen *ctx = nv04->screen +#include "nouveau/nouveau_push.h" + +#include "nv04_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ +	fprintf(stderr, "%s:%d -  "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ +	fprintf(stderr, "nouveau: "fmt, ##args); + +#include "nv04_screen.h" + +#define NV04_NEW_VERTPROG	(1 << 1) +#define NV04_NEW_FRAGPROG	(1 << 2) +#define NV04_NEW_BLEND		(1 << 3) +#define NV04_NEW_RAST		(1 << 4) +#define NV04_NEW_CONTROL	(1 << 5) +#define NV04_NEW_VIEWPORT	(1 << 6) +#define NV04_NEW_SAMPLER	(1 << 7) +#define NV04_NEW_FRAMEBUFFER	(1 << 8) +#define NV04_NEW_VTXARRAYS	(1 << 9) + +struct nv04_context { +	struct pipe_context pipe; + +	struct nouveau_winsys *nvws; +	struct nv04_screen *screen; +	unsigned pctx_id; + +	struct draw_context *draw; + +	int chipset; +	struct nouveau_notifier *sync; + +	uint32_t dirty; + +	struct nv04_blend_state *blend; +	struct nv04_sampler_state *sampler[PIPE_MAX_SAMPLERS]; +	struct nv04_fragtex_state fragtex; +	struct nv04_rasterizer_state *rast; +	struct nv04_depth_stencil_alpha_state *dsa; + +	struct nv04_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; +	unsigned dirty_samplers; +	unsigned fp_samplers; +	unsigned vp_samplers; + +	uint32_t rt_enable; +	struct pipe_framebuffer_state *framebuffer; +	struct pipe_surface *rt; +	struct pipe_surface *zeta; + +	struct { +		struct pipe_buffer *buffer; +		uint32_t format; +	} tex[16]; + +	unsigned vb_enable; +	struct { +		struct pipe_buffer *buffer; +		unsigned delta; +	} vb[16]; + +	float *constbuf[PIPE_SHADER_TYPES][32][4]; +	unsigned constbuf_nr[PIPE_SHADER_TYPES]; + +	struct vertex_info vertex_info; +	struct { +	 +		struct nouveau_resource *exec_heap; +		struct nouveau_resource *data_heap; + +		struct nv04_vertex_program *active; + +		struct nv04_vertex_program *current; +		struct pipe_buffer *constant_buf; +	} vertprog; + +	struct { +		struct nv04_fragment_program *active; + +		struct nv04_fragment_program *current; +		struct pipe_buffer *constant_buf; +	} fragprog; + +	struct pipe_vertex_buffer  vtxbuf[PIPE_MAX_ATTRIBS]; +	struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; + +	struct pipe_viewport_state viewport; +}; + +static INLINE struct nv04_context * +nv04_context(struct pipe_context *pipe) +{ +	return (struct nv04_context *)pipe; +} + +extern void nv04_init_state_functions(struct nv04_context *nv04); +extern void nv04_init_surface_functions(struct nv04_context *nv04); +extern void nv04_screen_init_miptree_functions(struct pipe_screen *screen); + +/* nv04_clear.c */ +extern void nv04_clear(struct pipe_context *pipe, struct pipe_surface *ps, +		       unsigned clearValue); + +/* nv04_draw.c */ +extern struct draw_stage *nv04_draw_render_stage(struct nv04_context *nv04); + +/* nv04_fragprog.c */ +extern void nv04_fragprog_bind(struct nv04_context *, +			       struct nv04_fragment_program *); +extern void nv04_fragprog_destroy(struct nv04_context *, +				  struct nv04_fragment_program *); + +/* nv04_fragtex.c */ +extern void nv04_fragtex_bind(struct nv04_context *); + +/* nv04_prim_vbuf.c */ +struct draw_stage *nv04_draw_vbuf_stage( struct nv04_context *nv04 ); + +/* nv04_state.c and friends */ +extern void nv04_emit_hw_state(struct nv04_context *nv04); +extern void nv04_state_tex_update(struct nv04_context *nv04); + +/* nv04_vbo.c */ +extern boolean nv04_draw_arrays(struct pipe_context *, unsigned mode, +				unsigned start, unsigned count); +extern boolean nv04_draw_elements( struct pipe_context *pipe, +                    struct pipe_buffer *indexBuffer, +                    unsigned indexSize, +                    unsigned prim, unsigned start, unsigned count); + + +#endif diff --git a/src/gallium/drivers/nv04/nv04_fragprog.c b/src/gallium/drivers/nv04/nv04_fragprog.c new file mode 100644 index 0000000000..8a2af41fe0 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_fragprog.c @@ -0,0 +1,21 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv04_context.h" + +void +nv04_fragprog_bind(struct nv04_context *nv04, struct nv04_fragment_program *fp) +{ +} + +void +nv04_fragprog_destroy(struct nv04_context *nv04, +		      struct nv04_fragment_program *fp) +{ +} + diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c new file mode 100644 index 0000000000..21f990fd53 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_fragtex.c @@ -0,0 +1,73 @@ +#include "nv04_context.h" +#include "nouveau/nouveau_util.h" + +#define _(m,tf)                                                                \ +{                                                                              \ +  PIPE_FORMAT_##m,                                                             \ +  NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf,                                               \ +} + +struct nv04_texture_format { +	uint	pipe; +	int     format; +}; + +static struct nv04_texture_format +nv04_texture_formats[] = { +	_(A8R8G8B8_UNORM, A8R8G8B8), +	_(X8R8G8B8_UNORM, X8R8G8B8), +	_(A1R5G5B5_UNORM, A1R5G5B5), +	_(A4R4G4B4_UNORM, A4R4G4B4), +	_(L8_UNORM,       Y8      ), +	_(A8_UNORM,       Y8      ), +}; + +static uint32_t +nv04_fragtex_format(uint pipe_format) +{ +	struct nv04_texture_format *tf = nv04_texture_formats; +	int i; + +	for (i=0; i< sizeof(nv04_texture_formats)/sizeof(nv04_texture_formats[0]); i++) { +		if (tf->pipe == pipe_format) +			return tf->format; +		tf++; +	} + +	NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format)); +	return 0; +} + + +static void +nv04_fragtex_build(struct nv04_context *nv04, int unit) +{ +	struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; +	struct pipe_texture *pt = &nv04mt->base; + +	switch (pt->target) { +	case PIPE_TEXTURE_2D: +		break; +	default: +		NOUVEAU_ERR("Unknown target %d\n", pt->target); +		return; +	} + +	nv04->fragtex.format = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER  +		| NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER +		| nv04_fragtex_format(pt->format) +		| ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) +		| ( log2i(pt->width[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) +		| ( log2i(pt->height[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) +		| NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE +		| NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE +		; +} + + +void +nv04_fragtex_bind(struct nv04_context *nv04) +{ +	nv04_fragtex_build(nv04, 0); +} + diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c new file mode 100644 index 0000000000..993c5ef5dd --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -0,0 +1,177 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv04_context.h" +#include "nv04_screen.h" + +static void +nv04_miptree_layout(struct nv04_miptree *nv04mt) +{ +	struct pipe_texture *pt = &nv04mt->base; +	uint width = pt->width[0], height = pt->height[0]; +	uint offset = 0; +	int nr_faces, l; + +	nr_faces = 1; + +	for (l = 0; l <= pt->last_level; l++) { +		pt->width[l] = width; +		pt->height[l] = height; + +		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); +		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); +		 +		nv04mt->level[l].pitch = pt->width[0]; +		nv04mt->level[l].pitch = (nv04mt->level[l].pitch + 63) & ~63; + +		width  = MAX2(1, width  >> 1); +		height = MAX2(1, height >> 1); +	} + +	for (l = 0; l <= pt->last_level; l++) { + +		nv04mt->level[l].image_offset = offset; +		offset += nv04mt->level[l].pitch * pt->height[l]; +	} + +	nv04mt->total_size = offset; +} + +static struct pipe_texture * +nv04_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) +{ +	struct pipe_winsys *ws = pscreen->winsys; +	struct nv04_miptree *mt; + +	mt = MALLOC(sizeof(struct nv04_miptree)); +	if (!mt) +		return NULL; +	mt->base = *pt; +	mt->base.refcount = 1; +	mt->base.screen = pscreen; +	mt->shadow_tex = NULL; +	mt->shadow_surface = NULL; + +	//mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + +	nv04_miptree_layout(mt); + +	mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL | +						NOUVEAU_BUFFER_USAGE_TEXTURE, +						mt->total_size); +	if (!mt->buffer) { +		printf("failed %d byte alloc\n",mt->total_size); +		FREE(mt); +		return NULL; +	} +	 +	return &mt->base; +} + +static struct pipe_texture * +nv04_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, +		     const unsigned *stride, struct pipe_buffer *pb) +{ +	struct nv04_miptree *mt; + +	/* Only supports 2D, non-mipmapped textures for the moment */ +	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || +	    pt->depth[0] != 1) +		return NULL; + +	mt = CALLOC_STRUCT(nv04_miptree); +	if (!mt) +		return NULL; + +	mt->base = *pt; +	mt->base.refcount = 1; +	mt->base.screen = pscreen; +	mt->level[0].pitch = stride[0]; +	mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + +	pipe_buffer_reference(pscreen, &mt->buffer, pb); +	return &mt->base; +} + +static void +nv04_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) +{ +	struct pipe_texture *pt = *ppt; +	struct nv04_miptree *mt = (struct nv04_miptree *)pt; +	int l; + +	*ppt = NULL; +	if (--pt->refcount) +		return; + +	pipe_buffer_reference(pscreen, &mt->buffer, NULL); +	for (l = 0; l <= pt->last_level; l++) { +		if (mt->level[l].image_offset) +			FREE(mt->level[l].image_offset); +	} + +	if (mt->shadow_tex) { +		assert(mt->shadow_surface); +		pscreen->tex_surface_release(pscreen, &mt->shadow_surface); +		nv04_miptree_release(pscreen, &mt->shadow_tex); +	} + +	FREE(mt); +} + +static struct pipe_surface * +nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, +			 unsigned face, unsigned level, unsigned zslice, +			 unsigned flags) +{ +	struct nv04_miptree *nv04mt = (struct nv04_miptree *)pt; +	struct pipe_surface *ps; + +	ps = CALLOC_STRUCT(pipe_surface); +	if (!ps) +		return NULL; +	pipe_texture_reference(&ps->texture, pt); +	ps->format = pt->format; +	ps->width = pt->width[level]; +	ps->height = pt->height[level]; +	ps->block = pt->block; +	ps->nblocksx = pt->nblocksx[level]; +	ps->nblocksy = pt->nblocksy[level]; +	ps->stride = nv04mt->level[level].pitch; +	ps->usage = flags; +	ps->status = PIPE_SURFACE_STATUS_DEFINED; +	ps->refcount = 1; +	ps->face = face; +	ps->level = level; +	ps->zslice = zslice; + +	ps->offset = nv04mt->level[level].image_offset; + +	return ps; +} + +static void +nv04_miptree_surface_del(struct pipe_screen *pscreen, +			 struct pipe_surface **psurface) +{ +	struct pipe_surface *ps = *psurface; + +	*psurface = NULL; +	if (--ps->refcount > 0) +		return; + +	pipe_texture_reference(&ps->texture, NULL); +	FREE(ps); +} + +void +nv04_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ +	pscreen->texture_create = nv04_miptree_create; +	pscreen->texture_blanket = nv04_miptree_blanket; +	pscreen->texture_release = nv04_miptree_release; +	pscreen->get_tex_surface = nv04_miptree_surface_new; +	pscreen->tex_surface_release = nv04_miptree_surface_del; +} + diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c new file mode 100644 index 0000000000..18a8872ae3 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c @@ -0,0 +1,309 @@ + +#include "pipe/p_debug.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_compiler.h" + +#include "draw/draw_vbuf.h" + +#include "nv04_context.h" +#include "nv04_state.h" + +#define VERTEX_SIZE 40 +#define VERTEX_BUFFER_SIZE (4096*VERTEX_SIZE) // 4096 vertices of 40 bytes each + +/** + * Primitive renderer for nv04. + */ +struct nv04_vbuf_render { +	struct vbuf_render base; + +	struct nv04_context *nv04;    + +	/** Vertex buffer */ +	unsigned char* buffer; + +	/** Vertex size in bytes */ +	unsigned vertex_size; + +	/** Current primitive */ +	unsigned prim; +}; + + +/** + * Basically a cast wrapper. + */ +static INLINE struct nv04_vbuf_render * +nv04_vbuf_render( struct vbuf_render *render ) +{ +	assert(render); +	return (struct nv04_vbuf_render *)render; +} + + +static const struct vertex_info * +nv04_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ +	struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); +	struct nv04_context *nv04 = nv04_render->nv04; +	return &nv04->vertex_info; +} + + +static void * +nv04_vbuf_render_allocate_vertices( struct vbuf_render *render, +		ushort vertex_size, +		ushort nr_vertices ) +{ +	struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + +	nv04_render->buffer = (unsigned char*) MALLOC(VERTEX_BUFFER_SIZE); +	assert(!nv04_render->buffer); + +	return nv04_render->buffer; +} + + +static boolean  +nv04_vbuf_render_set_primitive( struct vbuf_render *render,  +		unsigned prim ) +{ +	struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + +	if (prim <= PIPE_PRIM_LINE_STRIP) +		return FALSE; + +	nv04_render->prim = prim; +	return TRUE; +} + +static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5) +{ +	BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49); +	OUT_RINGp(buffer + VERTEX_SIZE * v0,8); +	OUT_RINGp(buffer + VERTEX_SIZE * v1,8); +	OUT_RINGp(buffer + VERTEX_SIZE * v2,8); +	OUT_RINGp(buffer + VERTEX_SIZE * v3,8); +	OUT_RINGp(buffer + VERTEX_SIZE * v4,8); +	OUT_RINGp(buffer + VERTEX_SIZE * v5,8); +	OUT_RING(0xFEDCBA); +} + +static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2) +{ +	BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25); +	OUT_RINGp(buffer + VERTEX_SIZE * v0,8); +	OUT_RINGp(buffer + VERTEX_SIZE * v1,8); +	OUT_RINGp(buffer + VERTEX_SIZE * v2,8); +	OUT_RING(0xFED); +} + +static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3) +{ +	BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33); +	OUT_RINGp(buffer + VERTEX_SIZE * v0,8); +	OUT_RINGp(buffer + VERTEX_SIZE * v1,8); +	OUT_RINGp(buffer + VERTEX_SIZE * v2,8); +	OUT_RINGp(buffer + VERTEX_SIZE * v3,8); +	OUT_RING(0xFECEDC); +} + +static void nv04_vbuf_render_triangles_elts(struct nv04_vbuf_render * render, const ushort * indices, uint nr_indices) +{ +	unsigned char* buffer = render->buffer; +	struct nv04_context* nv04 = render->nv04; +	int i; + +	for( i=0; i< nr_indices-5; i+=6) +		nv04_2triangles(nv04, +				buffer, +				indices[i+0], +				indices[i+1], +				indices[i+2], +				indices[i+3], +				indices[i+4], +				indices[i+5] +			       ); +	if (i != nr_indices) +	{ +		nv04_1triangle(nv04, +				buffer, +				indices[i+0], +				indices[i+1], +				indices[i+2] +			       ); +		i+=3; +	} +	if (i != nr_indices) +		NOUVEAU_ERR("Houston, we have lost some vertices\n"); +} + +static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices) +{ +	const uint32_t striptbl[]={0x321210,0x543432,0x765654,0x987876,0xBA9A98,0xDCBCBA,0xFEDEDC}; +	unsigned char* buffer = render->buffer; +	struct nv04_context* nv04 = render->nv04; +	int i,j; + +	for(i = 0; i<nr_indices; i+=14)  +	{ +		int numvert = MIN2(16, nr_indices - i); +		int numtri = numvert - 2; +		if (numvert<3) +			break; + +		BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 ); +		for(j = 0; j<numvert; j++) +			OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 ); + +		BEGIN_RING_NI( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2 ); +		for(j = 0; j<numtri/2; j++ ) +			OUT_RING(striptbl[j]); +		if (numtri%2) +			OUT_RING(striptbl[numtri/2]&0xFFF); +	} +} + +static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices) +{ +	const uint32_t fantbl[]={0x320210,0x540430,0x760650,0x980870,0xBA0A90,0xDC0CB0,0xFE0ED0}; +	unsigned char* buffer = render->buffer; +	struct nv04_context* nv04 = render->nv04; +	int i,j; + +	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); +	OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8); + +	for(i = 1; i<nr_indices; i+=14) +	{ +		int numvert=MIN2(15, nr_indices - i); +		int numtri=numvert-2; +		if (numvert < 3) +			break; + +		BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); + +		for(j=0;j<numvert;j++) +			OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 ); + +		BEGIN_RING_NI(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2); +		for(j = 0; j<numtri/2; j++) +			OUT_RING(fantbl[j]); +		if (numtri%2) +			OUT_RING(fantbl[numtri/2]&0xFFF); +	} +} + +static void nv04_vbuf_render_quads_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices) +{ +	unsigned char* buffer = render->buffer; +	struct nv04_context* nv04 = render->nv04; +	int i; + +	for(i = 0; i < nr_indices; i += 4) +		nv04_1quad(nv04, +				buffer, +				indices[i+0], +				indices[i+1], +				indices[i+2], +				indices[i+3] +			       ); +} + + +static void  +nv04_vbuf_render_draw( struct vbuf_render *render, +		const ushort *indices, +		uint nr_indices) +{ +	struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + +	// emit the indices +	switch( nv04_render->prim ) +	{ +		case PIPE_PRIM_TRIANGLES: +			nv04_vbuf_render_triangles_elts(nv04_render, indices, nr_indices); +			break; +		case PIPE_PRIM_QUAD_STRIP: +		case PIPE_PRIM_TRIANGLE_STRIP: +			nv04_vbuf_render_tri_strip_elts(nv04_render, indices, nr_indices); +			break; +		case PIPE_PRIM_TRIANGLE_FAN: +		case PIPE_PRIM_POLYGON: +			nv04_vbuf_render_tri_fan_elts(nv04_render, indices, nr_indices); +			break; +		case PIPE_PRIM_QUADS: +			nv04_vbuf_render_quads_elts(nv04_render, indices, nr_indices); +			break; +		default: +			NOUVEAU_ERR("You have to implement primitive %d, young padawan\n", nv04_render->prim); +			break; +	} +} + + +static void +nv04_vbuf_render_release_vertices( struct vbuf_render *render, +		void *vertices,  +		unsigned vertex_size, +		unsigned vertices_used ) +{ +	struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + +	free(nv04_render->buffer); +	nv04_render->buffer = NULL; +} + + +static void +nv04_vbuf_render_destroy( struct vbuf_render *render ) +{ +	struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); +	FREE(nv04_render); +} + + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +nv04_vbuf_render_create( struct nv04_context *nv04 ) +{ +	struct nv04_vbuf_render *nv04_render = CALLOC_STRUCT(nv04_vbuf_render); + +	nv04_render->nv04 = nv04; + +	nv04_render->base.max_vertex_buffer_bytes = VERTEX_BUFFER_SIZE; +	nv04_render->base.max_indices = 65536;  +	nv04_render->base.get_vertex_info = nv04_vbuf_render_get_vertex_info; +	nv04_render->base.allocate_vertices = nv04_vbuf_render_allocate_vertices; +	nv04_render->base.set_primitive = nv04_vbuf_render_set_primitive; +	nv04_render->base.draw = nv04_vbuf_render_draw; +	nv04_render->base.release_vertices = nv04_vbuf_render_release_vertices; +	nv04_render->base.destroy = nv04_vbuf_render_destroy; + +	return &nv04_render->base; +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *nv04_draw_vbuf_stage( struct nv04_context *nv04 ) +{ +	struct vbuf_render *render; +	struct draw_stage *stage; + +	render = nv04_vbuf_render_create(nv04); +	if(!render) +		return NULL; + +	stage = draw_vbuf_stage( nv04->draw, render ); +	if(!stage) { +		render->destroy(render); +		return NULL; +	} + +	return stage; +} diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c new file mode 100644 index 0000000000..9ef38bc244 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -0,0 +1,237 @@ +#include "pipe/p_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_simple_screen.h" + +#include "nv04_context.h" +#include "nv04_screen.h" + +static const char * +nv04_screen_get_name(struct pipe_screen *screen) +{ +	struct nv04_screen *nv04screen = nv04_screen(screen); +	struct nouveau_device *dev = nv04screen->nvws->channel->device; +	static char buffer[128]; + +	snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); +	return buffer; +} + +static const char * +nv04_screen_get_vendor(struct pipe_screen *screen) +{ +	return "nouveau"; +} + +static int +nv04_screen_get_param(struct pipe_screen *screen, int param) +{ +	switch (param) { +	case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: +		return 1; +	case PIPE_CAP_NPOT_TEXTURES: +		return 0; +	case PIPE_CAP_TWO_SIDED_STENCIL: +		return 0; +	case PIPE_CAP_GLSL: +		return 0; +	case PIPE_CAP_S3TC: +		return 0; +	case PIPE_CAP_ANISOTROPIC_FILTER: +		return 0; +	case PIPE_CAP_POINT_SPRITE: +		return 0; +	case PIPE_CAP_MAX_RENDER_TARGETS: +		return 1; +	case PIPE_CAP_OCCLUSION_QUERY: +		return 0; +	case PIPE_CAP_TEXTURE_SHADOW_MAP: +		return 0; +	case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: +		return 10; +	case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: +		return 0; +	case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: +		return 0; +	case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: +		return 0; +	case PIPE_CAP_TEXTURE_MIRROR_CLAMP: +		return 0; +	case PIPE_CAP_TEXTURE_MIRROR_REPEAT: +		return 1; +	case NOUVEAU_CAP_HW_VTXBUF: +	case NOUVEAU_CAP_HW_IDXBUF: +		return 0; +	default: +		NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); +		return 0; +	} +} + +static float +nv04_screen_get_paramf(struct pipe_screen *screen, int param) +{ +	switch (param) { +	case PIPE_CAP_MAX_LINE_WIDTH: +	case PIPE_CAP_MAX_LINE_WIDTH_AA: +		return 0.0; +	case PIPE_CAP_MAX_POINT_WIDTH: +	case PIPE_CAP_MAX_POINT_WIDTH_AA: +		return 0.0; +	case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: +		return 0.0; +	case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: +		return 0.0; +	default: +		NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); +		return 0.0; +	} +} + +static boolean +nv04_screen_is_format_supported(struct pipe_screen *screen, +				enum pipe_format format, +				enum pipe_texture_target target, +				unsigned tex_usage, unsigned geom_flags) +{ +	if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { +		switch (format) { +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_R5G6B5_UNORM:  +		case PIPE_FORMAT_Z16_UNORM: +			return TRUE; +		default: +			break; +		} +	} else { +		switch (format) { +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_X8R8G8B8_UNORM: +		case PIPE_FORMAT_A1R5G5B5_UNORM: +		case PIPE_FORMAT_R5G6B5_UNORM:  +		case PIPE_FORMAT_L8_UNORM: +		case PIPE_FORMAT_A8_UNORM: +			return TRUE; +		default: +			break; +		} +	} + +	return FALSE; +} + +static void * +nv04_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, +		 unsigned flags ) +{ +	void *map; +	struct nv04_miptree *nv04mt = (struct nv04_miptree *)surface->texture; + +	map = pipe_buffer_map(screen, nv04mt->buffer, flags); +	if (!map) +		return NULL; + +	return map + surface->offset; +} + +static void +nv04_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ +	struct nv04_miptree *nv04mt = (struct nv04_miptree *)surface->texture; + +	pipe_buffer_unmap(screen, nv04mt->buffer); +} + +static void +nv04_screen_destroy(struct pipe_screen *pscreen) +{ +	struct nv04_screen *screen = nv04_screen(pscreen); +	struct nouveau_winsys *nvws = screen->nvws; + +	nvws->notifier_free(&screen->sync); +	nvws->grobj_free(&screen->fahrenheit); +	nv04_surface_2d_takedown(&screen->eng2d); + +	FREE(pscreen); +} + +static struct pipe_buffer * +nv04_surface_buffer(struct pipe_surface *surf) +{ +	struct nv04_miptree *mt = (struct nv04_miptree *)surf->texture; + +	return mt->buffer; +} + +struct pipe_screen * +nv04_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ +	struct nv04_screen *screen = CALLOC_STRUCT(nv04_screen); +	unsigned fahrenheit_class = 0, sub3d_class = 0; +	unsigned chipset = nvws->channel->device->chipset; +	int ret; + +	if (!screen) +		return NULL; +	screen->nvws = nvws; + +	if (chipset>=0x20) { +		fahrenheit_class = 0; +		sub3d_class = 0; +	} else if (chipset>=0x10) { +		fahrenheit_class = NV10_DX5_TEXTURED_TRIANGLE; +		sub3d_class = NV10_CONTEXT_SURFACES_3D; +	} else { +		fahrenheit_class=NV04_DX5_TEXTURED_TRIANGLE; +		sub3d_class = NV04_CONTEXT_SURFACES_3D; +	} + +	if (!fahrenheit_class) { +		NOUVEAU_ERR("Unknown nv04 chipset: nv%02x\n", chipset); +		return NULL; +	} + +	/* 2D engine setup */ +	screen->eng2d = nv04_surface_2d_init(nvws); +	screen->eng2d->buf = nv04_surface_buffer; + +	/* 3D object */ +	ret = nvws->grobj_alloc(nvws, fahrenheit_class, &screen->fahrenheit); +	if (ret) { +		NOUVEAU_ERR("Error creating 3D object: %d\n", ret); +		return NULL; +	} + +	/* 3D surface object */ +	ret = nvws->grobj_alloc(nvws, sub3d_class, &screen->context_surfaces_3d); +	if (ret) { +		NOUVEAU_ERR("Error creating 3D surface object: %d\n", ret); +		return NULL; +	} + +	/* Notifier for sync purposes */ +	ret = nvws->notifier_alloc(nvws, 1, &screen->sync); +	if (ret) { +		NOUVEAU_ERR("Error creating notifier object: %d\n", ret); +		nv04_screen_destroy(&screen->pipe); +		return NULL; +	} + +	screen->pipe.winsys = ws; +	screen->pipe.destroy = nv04_screen_destroy; + +	screen->pipe.get_name = nv04_screen_get_name; +	screen->pipe.get_vendor = nv04_screen_get_vendor; +	screen->pipe.get_param = nv04_screen_get_param; +	screen->pipe.get_paramf = nv04_screen_get_paramf; + +	screen->pipe.is_format_supported = nv04_screen_is_format_supported; + +	screen->pipe.surface_map = nv04_surface_map; +	screen->pipe.surface_unmap = nv04_surface_unmap; + +	nv04_screen_init_miptree_functions(&screen->pipe); +	u_simple_screen_init(&screen->pipe); + +	return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv04/nv04_screen.h b/src/gallium/drivers/nv04/nv04_screen.h new file mode 100644 index 0000000000..540aec907b --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_screen.h @@ -0,0 +1,27 @@ +#ifndef __NV04_SCREEN_H__ +#define __NV04_SCREEN_H__ + +#include "pipe/p_screen.h" +#include "nv04_surface_2d.h" + +struct nv04_screen { +	struct pipe_screen pipe; + +	struct nouveau_winsys *nvws; +	unsigned chipset; + +	/* HW graphics objects */ +	struct nv04_surface_2d *eng2d; +	struct nouveau_grobj *fahrenheit; +	struct nouveau_grobj *context_surfaces_3d; +	struct nouveau_notifier *sync; + +}; + +static INLINE struct nv04_screen * +nv04_screen(struct pipe_screen *screen) +{ +	return (struct nv04_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c new file mode 100644 index 0000000000..87c635f962 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -0,0 +1,458 @@ +#include "draw/draw_context.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv04_context.h" +#include "nv04_state.h" + +static void * +nv04_blend_state_create(struct pipe_context *pipe, +			const struct pipe_blend_state *cso) +{ +	struct nv04_blend_state *cb; + +	cb = MALLOC(sizeof(struct nv04_blend_state)); + +	cb->b_enable = cso->blend_enable ? 1 : 0; +	cb->b_src = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | +			 (nvgl_blend_func(cso->rgb_src_factor))); +	cb->b_dst = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) | +			 (nvgl_blend_func(cso->rgb_dst_factor))); +	 + +	return (void *)cb; +} + +static void +nv04_blend_state_bind(struct pipe_context *pipe, void *blend) +{ +	struct nv04_context *nv04 = nv04_context(pipe); + +	nv04->blend = (struct nv04_blend_state*)blend; + +	nv04->dirty |= NV04_NEW_BLEND; +} + +static void +nv04_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	free(hwcso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { +	unsigned ret; + +	switch (wrap) { +	case PIPE_TEX_WRAP_REPEAT: +		ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT; +		break; +	case PIPE_TEX_WRAP_MIRROR_REPEAT: +		ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT; +		break; +	case PIPE_TEX_WRAP_CLAMP_TO_EDGE: +		ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE; +		break; +	case PIPE_TEX_WRAP_CLAMP_TO_BORDER: +		ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER; +		break; +	case PIPE_TEX_WRAP_CLAMP: +		ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; +		break; +	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: +	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: +	case PIPE_TEX_WRAP_MIRROR_CLAMP: +	default: +		NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); +		ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; +	} +	return ret >> NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT; +} + +static void * +nv04_sampler_state_create(struct pipe_context *pipe, +			  const struct pipe_sampler_state *cso) +{ + +	struct nv04_sampler_state *ss; +	uint32_t filter = 0; + +	ss = MALLOC(sizeof(struct nv04_sampler_state)); + +	ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) | +		    (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT)); + +	if (cso->max_anisotropy > 1.0) { +		filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE; +	} + +	switch (cso->mag_img_filter) { +	case PIPE_TEX_FILTER_LINEAR: +		filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR; +		break; +	case PIPE_TEX_FILTER_NEAREST: +	default: +		filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST; +		break; +	} + +	switch (cso->min_img_filter) { +	case PIPE_TEX_FILTER_LINEAR: +		switch (cso->min_mip_filter) { +		case PIPE_TEX_MIPFILTER_NEAREST: +			filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; +			break; +		case PIPE_TEX_MIPFILTER_LINEAR: +			filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; +			break; +		case PIPE_TEX_MIPFILTER_NONE: +		default: +			filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR; +			break; +		} +		break; +	case PIPE_TEX_FILTER_NEAREST: +	default: +		switch (cso->min_mip_filter) { +		case PIPE_TEX_MIPFILTER_NEAREST: +			filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; +		break; +		case PIPE_TEX_MIPFILTER_LINEAR: +			filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; +			break; +		case PIPE_TEX_MIPFILTER_NONE: +		default: +			filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST; +			break; +		} +		break; +	} + +	ss->filter = filter; + +	return (void *)ss; +} + +static void +nv04_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ +	struct nv04_context *nv04 = nv04_context(pipe); +	unsigned unit; + +	for (unit = 0; unit < nr; unit++) { +		nv04->sampler[unit] = sampler[unit]; +		nv04->dirty_samplers |= (1 << unit); +	} +} + +static void +nv04_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	free(hwcso); +} + +static void +nv04_set_sampler_texture(struct pipe_context *pipe, unsigned nr, +			 struct pipe_texture **miptree) +{ +	struct nv04_context *nv04 = nv04_context(pipe); +	unsigned unit; + +	for (unit = 0; unit < nr; unit++) { +		nv04->tex_miptree[unit] = (struct nv04_miptree *)miptree[unit]; +		nv04->dirty_samplers |= (1 << unit); +	} +} + +static void * +nv04_rasterizer_state_create(struct pipe_context *pipe, +			     const struct pipe_rasterizer_state *cso) +{ +	struct nv04_rasterizer_state *rs; + +	/*XXX: ignored: +	 * 	scissor +	 * 	points/lines (no hw support, emulated with tris in gallium) +	 */ +	rs = MALLOC(sizeof(struct nv04_rasterizer_state)); + +	rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD; + +	return (void *)rs; +} + +static void +nv04_rasterizer_state_bind(struct pipe_context *pipe, void *rast) +{ +	struct nv04_context *nv04 = nv04_context(pipe); + +	nv04->rast = (struct nv04_rasterizer_state*)rast; + +	draw_set_rasterizer_state(nv04->draw, (nv04->rast ? nv04->rast->templ : NULL)); + +	nv04->dirty |= NV04_NEW_RAST | NV04_NEW_BLEND; +} + +static void +nv04_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	free(hwcso); +} + +static INLINE uint32_t nv04_compare_func(uint32_t f) +{ +	switch ( f ) { +		case PIPE_FUNC_NEVER:		return 1; +		case PIPE_FUNC_LESS:		return 2; +		case PIPE_FUNC_EQUAL:		return 3; +		case PIPE_FUNC_LEQUAL:		return 4; +		case PIPE_FUNC_GREATER:		return 5; +		case PIPE_FUNC_NOTEQUAL:	return 6; +		case PIPE_FUNC_GEQUAL:		return 7; +		case PIPE_FUNC_ALWAYS:		return 8; +	} +	NOUVEAU_MSG("Unable to find the function\n"); +	return 0; +} + +static void * +nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe, +			const struct pipe_depth_stencil_alpha_state *cso) +{ +	struct nv04_depth_stencil_alpha_state *hw; + +	hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state)); + +	hw->control = float_to_ubyte(cso->alpha.ref_value); +	hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT ); +	hw->control |= cso->alpha.enabled ? NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE : 0; +	hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN; +	hw->control |= cso->depth.enabled ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT) : 0; +	hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT ); +	hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module +	hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE; +	hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE; +	hw->control |= cso->depth.writemask ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT) : 0; +	hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format + +	return (void *)hw; +} + +static void +nv04_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv04_context *nv04 = nv04_context(pipe); + +	nv04->dsa = hwcso; +	nv04->dirty |= NV04_NEW_CONTROL; +} + +static void +nv04_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	free(hwcso); +} + +static void * +nv04_vp_state_create(struct pipe_context *pipe, +		     const struct pipe_shader_state *templ) +{ +	struct nv04_context *nv04 = nv04_context(pipe); + +	return draw_create_vertex_shader(nv04->draw, templ); +} + +static void +nv04_vp_state_bind(struct pipe_context *pipe, void *shader) +{ +	struct nv04_context *nv04 = nv04_context(pipe); + +	draw_bind_vertex_shader(nv04->draw, (struct draw_vertex_shader *) shader); + +	nv04->dirty |= NV04_NEW_VERTPROG; +} + +static void +nv04_vp_state_delete(struct pipe_context *pipe, void *shader) +{ +	struct nv04_context *nv04 = nv04_context(pipe); + +	draw_delete_vertex_shader(nv04->draw, (struct draw_vertex_shader *) shader); +} + +static void * +nv04_fp_state_create(struct pipe_context *pipe, +		     const struct pipe_shader_state *cso) +{ +	struct nv04_fragment_program *fp; + +	fp = CALLOC(1, sizeof(struct nv04_fragment_program)); +	fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + +	return (void *)fp; +} + +static void +nv04_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv04_context *nv04 = nv04_context(pipe); +	struct nv04_fragment_program *fp = hwcso; + +	nv04->fragprog.current = fp; +	nv04->dirty |= NV04_NEW_FRAGPROG; +} + +static void +nv04_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv04_context *nv04 = nv04_context(pipe); +	struct nv04_fragment_program *fp = hwcso; + +	nv04_fragprog_destroy(nv04, fp); +	free((void*)fp->pipe.tokens); +	free(fp); +} + +static void +nv04_set_blend_color(struct pipe_context *pipe, +		     const struct pipe_blend_color *bcol) +{ +} + +static void +nv04_set_clip_state(struct pipe_context *pipe, +		    const struct pipe_clip_state *clip) +{ +} + +static void +nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, +			 const struct pipe_constant_buffer *buf ) +{ +	struct nv04_context *nv04 = nv04_context(pipe); +	struct pipe_winsys *ws = pipe->winsys; + +	assert(shader < PIPE_SHADER_TYPES); +	assert(index == 0); + +	if (buf) { +		void *mapped; +		if (buf->buffer && buf->buffer->size && +                    (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) +		{ +			memcpy(nv04->constbuf[shader], mapped, buf->buffer->size); +			nv04->constbuf_nr[shader] = +				buf->buffer->size / (4 * sizeof(float)); +			ws->buffer_unmap(ws, buf->buffer); +		} +	} +} + +static void +nv04_set_framebuffer_state(struct pipe_context *pipe, +			   const struct pipe_framebuffer_state *fb) +{ +	struct nv04_context *nv04 = nv04_context(pipe); +	 +	nv04->framebuffer = (struct pipe_framebuffer_state*)fb; + +	nv04->dirty |= NV04_NEW_FRAMEBUFFER; +} +static void +nv04_set_polygon_stipple(struct pipe_context *pipe, +			 const struct pipe_poly_stipple *stipple) +{ +	NOUVEAU_ERR("line stipple hahaha\n"); +} + +static void +nv04_set_scissor_state(struct pipe_context *pipe, +		       const struct pipe_scissor_state *s) +{ +/*	struct nv04_context *nv04 = nv04_context(pipe); + +	// XXX +	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2); +	OUT_RING  (((s->maxx - s->minx) << 16) | s->minx); +	OUT_RING  (((s->maxy - s->miny) << 16) | s->miny);*/ +} + +static void +nv04_set_viewport_state(struct pipe_context *pipe, +			const struct pipe_viewport_state *viewport) +{ +	struct nv04_context *nv04 = nv04_context(pipe); + +	nv04->viewport = *viewport; + +	draw_set_viewport_state(nv04->draw, &nv04->viewport); +} + +static void +nv04_set_vertex_buffers(struct pipe_context *pipe, unsigned count, +		       const struct pipe_vertex_buffer *buffers) +{ +	struct nv04_context *nv04 = nv04_context(pipe); + +	memcpy(nv04->vtxbuf, buffers, count * sizeof(buffers[0])); +	nv04->dirty |= NV04_NEW_VTXARRAYS; + +	draw_set_vertex_buffers(nv04->draw, count, buffers); +} + +static void +nv04_set_vertex_elements(struct pipe_context *pipe, unsigned count, +			const struct pipe_vertex_element *elements) +{ +	struct nv04_context *nv04 = nv04_context(pipe); + +	memcpy(nv04->vtxelt, elements, sizeof(*elements) * count); +	nv04->dirty |= NV04_NEW_VTXARRAYS; + +	draw_set_vertex_elements(nv04->draw, count, elements); +} + +void +nv04_init_state_functions(struct nv04_context *nv04) +{ +	nv04->pipe.create_blend_state = nv04_blend_state_create; +	nv04->pipe.bind_blend_state = nv04_blend_state_bind; +	nv04->pipe.delete_blend_state = nv04_blend_state_delete; + +	nv04->pipe.create_sampler_state = nv04_sampler_state_create; +	nv04->pipe.bind_sampler_states = nv04_sampler_state_bind; +	nv04->pipe.delete_sampler_state = nv04_sampler_state_delete; +	nv04->pipe.set_sampler_textures = nv04_set_sampler_texture; + +	nv04->pipe.create_rasterizer_state = nv04_rasterizer_state_create; +	nv04->pipe.bind_rasterizer_state = nv04_rasterizer_state_bind; +	nv04->pipe.delete_rasterizer_state = nv04_rasterizer_state_delete; + +	nv04->pipe.create_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_create; +	nv04->pipe.bind_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_bind; +	nv04->pipe.delete_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_delete; + +	nv04->pipe.create_vs_state = nv04_vp_state_create; +	nv04->pipe.bind_vs_state = nv04_vp_state_bind; +	nv04->pipe.delete_vs_state = nv04_vp_state_delete; + +	nv04->pipe.create_fs_state = nv04_fp_state_create; +	nv04->pipe.bind_fs_state = nv04_fp_state_bind; +	nv04->pipe.delete_fs_state = nv04_fp_state_delete; + +	nv04->pipe.set_blend_color = nv04_set_blend_color; +	nv04->pipe.set_clip_state = nv04_set_clip_state; +	nv04->pipe.set_constant_buffer = nv04_set_constant_buffer; +	nv04->pipe.set_framebuffer_state = nv04_set_framebuffer_state; +	nv04->pipe.set_polygon_stipple = nv04_set_polygon_stipple; +	nv04->pipe.set_scissor_state = nv04_set_scissor_state; +	nv04->pipe.set_viewport_state = nv04_set_viewport_state; + +	nv04->pipe.set_vertex_buffers = nv04_set_vertex_buffers; +	nv04->pipe.set_vertex_elements = nv04_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv04/nv04_state.h b/src/gallium/drivers/nv04/nv04_state.h new file mode 100644 index 0000000000..15d4685ec1 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_state.h @@ -0,0 +1,74 @@ +#ifndef __NV04_STATE_H__ +#define __NV04_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv04_blend_state { +	uint32_t b_enable; +	uint32_t b_src; +	uint32_t b_dst; +}; + +struct nv04_fragtex_state { +	uint32_t format; +}; + +struct nv04_sampler_state { +	uint32_t filter; +	uint32_t format; +}; + +struct nv04_depth_stencil_alpha_state { +	uint32_t control; +}; + +struct nv04_rasterizer_state { +	uint32_t blend; + +	const struct pipe_rasterizer_state *templ; +}; + +struct nv04_miptree { +	struct pipe_texture base; + +	struct pipe_buffer *buffer; +	uint total_size; + +	struct pipe_texture *shadow_tex; +	struct pipe_surface *shadow_surface; + +	struct { +		uint pitch; +		uint image_offset; +	} level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +struct nv04_fragment_program_data { +	unsigned offset; +	unsigned index; +}; + +struct nv04_fragment_program { +	struct pipe_shader_state pipe; +	struct tgsi_shader_info info; + +	boolean translated; +	boolean on_hw; +	unsigned samplers; + +	uint32_t *insn; +	int       insn_len; + +	struct nv04_fragment_program_data *consts; +	unsigned nr_consts; + +	struct pipe_buffer *buffer; + +	uint32_t fp_control; +	uint32_t fp_reg_control; +}; + + + +#endif diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c new file mode 100644 index 0000000000..bd8ef1adbf --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_state_emit.c @@ -0,0 +1,223 @@ +#include "nv04_context.h" +#include "nv04_state.h" + +static void nv04_vertex_layout(struct pipe_context* pipe) +{ +	struct nv04_context *nv04 = nv04_context(pipe); +	struct nv04_fragment_program *fp = nv04->fragprog.current; +	uint32_t src = 0; +	int i; +	struct vertex_info vinfo; + +	memset(&vinfo, 0, sizeof(vinfo)); + +	for (i = 0; i < fp->info.num_inputs; i++) { +		int isn = fp->info.input_semantic_name[i]; +		int isi = fp->info.input_semantic_index[i]; +		switch (isn) { +			case TGSI_SEMANTIC_POSITION: +				draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); +				break; +			case TGSI_SEMANTIC_COLOR: +				draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); +				break; +			default: +			case TGSI_SEMANTIC_GENERIC: +				draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); +				break; +			case TGSI_SEMANTIC_FOG: +				draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); +				break; +		} +	} + +	printf("%d vertex input\n",fp->info.num_inputs); +	draw_compute_vertex_size(&vinfo); +} + +static uint32_t nv04_blend_func(uint32_t f) +{ +	switch ( f ) { +		case PIPE_BLENDFACTOR_ZERO:			return 0x1; +		case PIPE_BLENDFACTOR_ONE:			return 0x2; +		case PIPE_BLENDFACTOR_SRC_COLOR:		return 0x3; +		case PIPE_BLENDFACTOR_INV_SRC_COLOR:		return 0x4; +		case PIPE_BLENDFACTOR_SRC_ALPHA:		return 0x5; +		case PIPE_BLENDFACTOR_INV_SRC_ALPHA:		return 0x6; +		case PIPE_BLENDFACTOR_DST_ALPHA:		return 0x7; +		case PIPE_BLENDFACTOR_INV_DST_ALPHA:		return 0x8; +		case PIPE_BLENDFACTOR_DST_COLOR:		return 0x9; +		case PIPE_BLENDFACTOR_INV_DST_COLOR:		return 0xA; +		case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:	return 0xB; +	} +	NOUVEAU_MSG("Unable to find the blend function 0x%x\n",f); +	return 0; +} + +static void nv04_emit_control(struct nv04_context* nv04) +{ +	uint32_t control = nv04->dsa->control; + +	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); +	OUT_RING(control); +} + +static void nv04_emit_blend(struct nv04_context* nv04) +{ +	uint32_t blend; + +	blend=0x4; // texture MODULATE_ALPHA +	blend|=0x20; // alpha is MSB +	blend|=(2<<6); // flat shading +	blend|=(1<<8); // persp correct +	blend|=(0<<16); // no fog +	blend|=(nv04->blend->b_enable<<20); +	blend|=(nv04_blend_func(nv04->blend->b_src)<<24); +	blend|=(nv04_blend_func(nv04->blend->b_dst)<<28); + +	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); +	OUT_RING(blend); +} + +static void nv04_emit_sampler(struct nv04_context *nv04, int unit) +{ +	struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; +	struct pipe_texture *pt = &nv04mt->base; + +	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 3); +	OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); +	OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); +	OUT_RING(nv04->sampler[unit]->filter); +} + +static void nv04_state_emit_framebuffer(struct nv04_context* nv04) +{ +	struct pipe_framebuffer_state* fb = nv04->framebuffer; +	struct pipe_surface *rt, *zeta; +	uint32_t rt_format, w, h; +	int colour_format = 0, zeta_format = 0; +	struct nv04_miptree *nv04mt = 0; + +	w = fb->cbufs[0]->width; +	h = fb->cbufs[0]->height; +	colour_format = fb->cbufs[0]->format; +	rt = fb->cbufs[0]; + +	if (fb->zsbuf) { +		if (colour_format) { +			assert(w == fb->zsbuf->width); +			assert(h == fb->zsbuf->height); +		} else { +			w = fb->zsbuf->width; +			h = fb->zsbuf->height; +		} + +		zeta_format = fb->zsbuf->format; +		zeta = fb->zsbuf; +	} + +	switch (colour_format) { +	case PIPE_FORMAT_A8R8G8B8_UNORM: +	case 0: +		rt_format = 0x108; +		break; +	case PIPE_FORMAT_R5G6B5_UNORM: +		rt_format = 0x103; +		break; +	default: +		assert(0); +	} + +	BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); +	OUT_RING(rt_format); + +	nv04mt = (struct nv04_miptree *)rt->texture; +	/* FIXME pitches have to be aligned ! */ +	BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); +	OUT_RING(rt->stride|(zeta->stride<<16)); +	OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	if (fb->zsbuf) { +		nv04mt = (struct nv04_miptree *)zeta->texture; +		BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); +		OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	} +} + +void +nv04_emit_hw_state(struct nv04_context *nv04) +{ +	int i; + +	if (nv04->dirty & NV04_NEW_VERTPROG) { +		//nv04_vertprog_bind(nv04, nv04->vertprog.current); +		nv04->dirty &= ~NV04_NEW_VERTPROG; +	} + +	if (nv04->dirty & NV04_NEW_FRAGPROG) { +		nv04_fragprog_bind(nv04, nv04->fragprog.current); +		nv04->dirty &= ~NV04_NEW_FRAGPROG; +		nv04->dirty_samplers |= (1<<10); +		nv04->dirty_samplers = 0; +	} + +	if (nv04->dirty & NV04_NEW_CONTROL) { +		nv04->dirty &= ~NV04_NEW_CONTROL; + +		BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); +		OUT_RING(nv04->dsa->control); +	} + +	if (nv04->dirty & NV04_NEW_BLEND) { +		nv04->dirty &= ~NV04_NEW_BLEND; + +		nv04_emit_blend(nv04); +	} + +	if (nv04->dirty & NV04_NEW_VTXARRAYS) { +		nv04->dirty &= ~NV04_NEW_VTXARRAYS; +		nv04_vertex_layout(nv04); +	} + +	if (nv04->dirty & NV04_NEW_SAMPLER) { +		nv04->dirty &= ~NV04_NEW_SAMPLER; + +		nv04_emit_sampler(nv04, 0); +	} + +	if (nv04->dirty & NV04_NEW_VIEWPORT) { +		nv04->dirty &= ~NV04_NEW_VIEWPORT; +//		nv04_state_emit_viewport(nv04); +	} + + 	if (nv04->dirty & NV04_NEW_FRAMEBUFFER) { +		nv04->dirty &= ~NV04_NEW_FRAMEBUFFER; +		nv04_state_emit_framebuffer(nv04); +	} + +	/* Emit relocs for every referenced buffer. +	 * This is to ensure the bufmgr has an accurate idea of how +	 * the buffer is used.  This isn't very efficient, but we don't +	 * seem to take a significant performance hit.  Will be improved +	 * at some point.  Vertex arrays are emitted by nv04_vbo.c +	 */ + +	/* Render target */ +	BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); +	OUT_RING(nv04->rt->stride|(nv04->zeta->stride<<16)); +	OUT_RELOCl(nv04->rt, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	if (nv04->zeta) { +		BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); +		OUT_RELOCl(nv04->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	} + +	/* Texture images */ +	for (i = 0; i < 1; i++) { +		if (!(nv04->fp_samplers & (1 << i))) +			continue; +		struct nv04_miptree *nv04mt = nv04->tex_miptree[i]; +		BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 2); +		OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); +		OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); +	} +} + diff --git a/src/gallium/drivers/nv04/nv04_surface.c b/src/gallium/drivers/nv04/nv04_surface.c new file mode 100644 index 0000000000..14abf16679 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_surface.c @@ -0,0 +1,72 @@ + +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "nv04_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" + +static void +nv04_surface_copy(struct pipe_context *pipe, boolean do_flip, +		  struct pipe_surface *dest, unsigned destx, unsigned desty, +		  struct pipe_surface *src, unsigned srcx, unsigned srcy, +		  unsigned width, unsigned height) +{ +	struct nv04_context *nv04 = nv04_context(pipe); +	struct nv04_surface_2d *eng2d = nv04->screen->eng2d; + +	if (do_flip) { +		desty += height; +		while (height--) { +			eng2d->copy(eng2d, dest, destx, desty--, src, +				    srcx, srcy++, width, 1); +		} +		return; +	} + +	eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); +} + +static void +nv04_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, +		  unsigned destx, unsigned desty, unsigned width, +		  unsigned height, unsigned value) +{ +	struct nv04_context *nv04 = nv04_context(pipe); +	struct nv04_surface_2d *eng2d = nv04->screen->eng2d; + +	eng2d->fill(eng2d, dest, destx, desty, width, height, value); +} + +void +nv04_init_surface_functions(struct nv04_context *nv04) +{ +	nv04->pipe.surface_copy = nv04_surface_copy; +	nv04->pipe.surface_fill = nv04_surface_fill; +} diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c new file mode 100644 index 0000000000..230cfd17dd --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_surface_2d.c @@ -0,0 +1,448 @@ +#include "pipe/p_context.h" +#include "pipe/p_format.h" +#include "util/u_memory.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_util.h" +#include "nv04_surface_2d.h" + +static INLINE int +nv04_surface_format(enum pipe_format format) +{ +	switch (format) { +	case PIPE_FORMAT_A8_UNORM: +		return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8; +	case PIPE_FORMAT_R16_SNORM: +	case PIPE_FORMAT_R5G6B5_UNORM: +		return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5; +	case PIPE_FORMAT_X8R8G8B8_UNORM: +	case PIPE_FORMAT_A8R8G8B8_UNORM: +		return NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8; +	case PIPE_FORMAT_Z24S8_UNORM: +		return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32; +	default: +		return -1; +	} +} + +static INLINE int +nv04_rect_format(enum pipe_format format) +{ +	switch (format) { +	case PIPE_FORMAT_A8_UNORM: +		return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; +	case PIPE_FORMAT_R5G6B5_UNORM: +		return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; +	case PIPE_FORMAT_A8R8G8B8_UNORM: +	case PIPE_FORMAT_Z24S8_UNORM: +		return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; +	default: +		return -1; +	} +} + +static INLINE int +nv04_scaled_image_format(enum pipe_format format) +{ +	switch (format) { +	case PIPE_FORMAT_A1R5G5B5_UNORM: +		return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5; +	case PIPE_FORMAT_A8R8G8B8_UNORM: +		return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8; +	case PIPE_FORMAT_X8R8G8B8_UNORM: +		return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8; +	case PIPE_FORMAT_R5G6B5_UNORM: +	case PIPE_FORMAT_R16_SNORM: +		return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5; +	default: +		return -1; +	} +} + +static INLINE unsigned +nv04_swizzle_bits(unsigned x, unsigned y) +{ +	unsigned u = (x & 0x001) << 0 | +	             (x & 0x002) << 1 | +	             (x & 0x004) << 2 | +	             (x & 0x008) << 3 | +	             (x & 0x010) << 4 | +	             (x & 0x020) << 5 | +	             (x & 0x040) << 6 | +	             (x & 0x080) << 7 | +	             (x & 0x100) << 8 | +	             (x & 0x200) << 9 | +	             (x & 0x400) << 10 | +	             (x & 0x800) << 11; + +	unsigned v = (y & 0x001) << 1 | +	             (y & 0x002) << 2 | +	             (y & 0x004) << 3 | +	             (y & 0x008) << 4 | +	             (y & 0x010) << 5 | +	             (y & 0x020) << 6 | +	             (y & 0x040) << 7 | +	             (y & 0x080) << 8 | +	             (y & 0x100) << 9 | +	             (y & 0x200) << 10 | +	             (y & 0x400) << 11 | +	             (y & 0x800) << 12; +	return v | u; +} + +static int +nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, +			  struct pipe_surface *dst, int dx, int dy, +			  struct pipe_surface *src, int sx, int sy, +			  int w, int h) +{ +	struct nouveau_channel *chan = ctx->nvws->channel; +	struct nouveau_grobj *swzsurf = ctx->swzsurf; +	struct nouveau_grobj *sifm = ctx->sifm; +	struct nouveau_bo *src_bo = ctx->nvws->get_bo(ctx->buf(src)); +	struct nouveau_bo *dst_bo = ctx->nvws->get_bo(ctx->buf(dst)); +	const unsigned max_w = 1024; +	const unsigned max_h = 1024; +	const unsigned sub_w = w > max_w ? max_w : w; +	const unsigned sub_h = h > max_h ? max_h : h; +	unsigned cx; +	unsigned cy; + +	/* POT or GTFO */ +	assert(!(w & (w - 1)) && !(h & (h - 1))); + +	BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1); +	OUT_RELOCo(chan, dst_bo, +	                 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + +	BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1); +	OUT_RING  (chan, nv04_surface_format(dst->format) | +	                 log2i(w) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT | +	                 log2i(h) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT); +  +	BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1); +	OUT_RELOCo(chan, src_bo, +	                 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +	BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1); +	OUT_RING  (chan, swzsurf->handle); + +	for (cy = 0; cy < h; cy += sub_h) { +	  for (cx = 0; cx < w; cx += sub_w) { +	    BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); +	    OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(cx, cy) * +			     dst->block.size, NOUVEAU_BO_GART | +			     NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + +	    BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); +	    OUT_RING  (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); +	    OUT_RING  (chan, nv04_scaled_image_format(src->format)); +	    OUT_RING  (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); +	    OUT_RING  (chan, 0); +	    OUT_RING  (chan, sub_h << 16 | sub_w); +	    OUT_RING  (chan, 0); +	    OUT_RING  (chan, sub_h << 16 | sub_w); +	    OUT_RING  (chan, 1 << 20); +	    OUT_RING  (chan, 1 << 20); + +	    BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SIZE, 4); +	    OUT_RING  (chan, sub_h << 16 | sub_w); +	    OUT_RING  (chan, src->stride | +			     NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER | +			     NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE); +	    OUT_RELOCl(chan, src_bo, src->offset + cy * src->stride + +			     cx * src->block.size, NOUVEAU_BO_GART | +			     NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +	    OUT_RING  (chan, 0); +	  } +	} + +	return 0; +} + +static int +nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx, +		       struct pipe_surface *dst, int dx, int dy, +		       struct pipe_surface *src, int sx, int sy, int w, int h) +{ +	struct nouveau_channel *chan = ctx->nvws->channel; +	struct nouveau_grobj *m2mf = ctx->m2mf; +	struct nouveau_bo *src_bo = ctx->nvws->get_bo(ctx->buf(src)); +	struct nouveau_bo *dst_bo = ctx->nvws->get_bo(ctx->buf(dst)); +	unsigned dst_offset, src_offset; + +	dst_offset = dst->offset + (dy * dst->stride) + (dx * dst->block.size); +	src_offset = src->offset + (sy * src->stride) + (sx * src->block.size); + +	WAIT_RING (chan, 3 + ((h / 2047) + 1) * 9); +	BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2); +	OUT_RELOCo(chan, src_bo, +		   NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +	OUT_RELOCo(chan, dst_bo, +		   NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + +	while (h) { +		int count = (h > 2047) ? 2047 : h; + +		BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8); +		OUT_RELOCl(chan, src_bo, src_offset, +			   NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); +		OUT_RELOCl(chan, dst_bo, dst_offset, +			   NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR); +		OUT_RING  (chan, src->stride); +		OUT_RING  (chan, dst->stride); +		OUT_RING  (chan, w * src->block.size); +		OUT_RING  (chan, count); +		OUT_RING  (chan, 0x0101); +		OUT_RING  (chan, 0); + +		h -= count; +		src_offset += src->stride * count; +		dst_offset += dst->stride * count; +	} + +	return 0; +} + +static int +nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst, +		       int dx, int dy, struct pipe_surface *src, int sx, int sy, +		       int w, int h) +{ +	struct nouveau_channel *chan = ctx->nvws->channel; +	struct nouveau_grobj *surf2d = ctx->surf2d; +	struct nouveau_grobj *blit = ctx->blit; +	struct nouveau_bo *src_bo = ctx->nvws->get_bo(ctx->buf(src)); +	struct nouveau_bo *dst_bo = ctx->nvws->get_bo(ctx->buf(dst)); +	int format; + +	format = nv04_surface_format(dst->format); +	if (format < 0) +		return 1; + +	WAIT_RING (chan, 12); +	BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); +	OUT_RELOCo(chan, src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +	OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); +	OUT_RING  (chan, format); +	OUT_RING  (chan, (dst->stride << 16) | src->stride); +	OUT_RELOCl(chan, src_bo, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); +	OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + +	BEGIN_RING(chan, blit, 0x0300, 3); +	OUT_RING  (chan, (sy << 16) | sx); +	OUT_RING  (chan, (dy << 16) | dx); +	OUT_RING  (chan, ( h << 16) |  w); + +	return 0; +} + +static void +nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst, +		  int dx, int dy, struct pipe_surface *src, int sx, int sy, +		  int w, int h) +{ +	int src_linear = src->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR; +	int dst_linear = dst->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR; + +	assert(src->format == dst->format); + +	/* Setup transfer to swizzle the texture to vram if needed */ +	if (src_linear && !dst_linear && w > 1 && h > 1) { +		nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h); +		return; +	} + +	/* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback +	 * to NV_MEMORY_TO_MEMORY_FORMAT in this case. +	 */ +	if ((src->offset & 63) || (dst->offset & 63) || +	    (src->stride & 63) || (dst->stride & 63)) { +		nv04_surface_copy_m2mf(ctx, dst, dx, dy, src, sx, sy, w, h); +		return; +	} + +	nv04_surface_copy_blit(ctx, dst, dx, dy, src, sx, sy, w, h); +} + +static void +nv04_surface_fill(struct nv04_surface_2d *ctx, struct pipe_surface *dst, +		  int dx, int dy, int w, int h, unsigned value) +{ +	struct nouveau_channel *chan = ctx->nvws->channel; +	struct nouveau_grobj *surf2d = ctx->surf2d; +	struct nouveau_grobj *rect = ctx->rect; +	struct nouveau_bo *dst_bo = ctx->nvws->get_bo(ctx->buf(dst)); +	int cs2d_format, gdirect_format; + +	cs2d_format = nv04_surface_format(dst->format); +	assert(cs2d_format >= 0); + +	gdirect_format = nv04_rect_format(dst->format); +	assert(gdirect_format >= 0); + +	WAIT_RING (chan, 16); +	BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); +	OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); +	OUT_RING  (chan, cs2d_format); +	OUT_RING  (chan, (dst->stride << 16) | dst->stride); +	OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + +	BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1); +	OUT_RING  (chan, gdirect_format); +	BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1); +	OUT_RING  (chan, value); +	BEGIN_RING(chan, rect, +		   NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2); +	OUT_RING  (chan, (dx << 16) | dy); +	OUT_RING  (chan, ( w << 16) |  h); +} + +void +nv04_surface_2d_takedown(struct nv04_surface_2d **pctx) +{ +	struct nv04_surface_2d *ctx; + +	if (!pctx || !*pctx) +		return; +	ctx = *pctx; +	*pctx = NULL; + +	nouveau_notifier_free(&ctx->ntfy); +	nouveau_grobj_free(&ctx->m2mf); +	nouveau_grobj_free(&ctx->surf2d); +	nouveau_grobj_free(&ctx->swzsurf); +	nouveau_grobj_free(&ctx->rect); +	nouveau_grobj_free(&ctx->blit); +	nouveau_grobj_free(&ctx->sifm); + +	FREE(ctx); +} + +struct nv04_surface_2d * +nv04_surface_2d_init(struct nouveau_winsys *nvws) +{ +	struct nv04_surface_2d *ctx = CALLOC_STRUCT(nv04_surface_2d); +	struct nouveau_channel *chan = nvws->channel; +	unsigned handle = 0x88000000, class; +	int ret; + +	if (!ctx) +		return NULL; + +	ret = nouveau_notifier_alloc(chan, handle++, 1, &ctx->ntfy); +	if (ret) { +		nv04_surface_2d_takedown(&ctx); +		return NULL; +	} + +	ret = nouveau_grobj_alloc(chan, handle++, 0x0039, &ctx->m2mf); +	if (ret) { +		nv04_surface_2d_takedown(&ctx); +		return NULL; +	} + +	BEGIN_RING(chan, ctx->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1); +	OUT_RING  (chan, ctx->ntfy->handle); + +	if (chan->device->chipset < 0x10) +		class = NV04_CONTEXT_SURFACES_2D; +	else +		class = NV10_CONTEXT_SURFACES_2D; + +	ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->surf2d); +	if (ret) { +		nv04_surface_2d_takedown(&ctx); +		return NULL; +	} + +	BEGIN_RING(chan, ctx->surf2d, +			 NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); +	OUT_RING  (chan, chan->vram->handle); +	OUT_RING  (chan, chan->vram->handle); + +	if (chan->device->chipset < 0x10) +		class = NV04_IMAGE_BLIT; +	else +		class = NV12_IMAGE_BLIT; + +	ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->blit); +	if (ret) { +		nv04_surface_2d_takedown(&ctx); +		return NULL; +	} + +	BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_DMA_NOTIFY, 1); +	OUT_RING  (chan, ctx->ntfy->handle); +	BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_SURFACE, 1); +	OUT_RING  (chan, ctx->surf2d->handle); +	BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_OPERATION, 1); +	OUT_RING  (chan, NV04_IMAGE_BLIT_OPERATION_SRCCOPY); + +	ret = nouveau_grobj_alloc(chan, handle++, NV04_GDI_RECTANGLE_TEXT, +				  &ctx->rect); +	if (ret) { +		nv04_surface_2d_takedown(&ctx); +		return NULL; +	} + +	BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1); +	OUT_RING  (chan, ctx->ntfy->handle); +	BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1); +	OUT_RING  (chan, ctx->surf2d->handle); +	BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1); +	OUT_RING  (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY); +	BEGIN_RING(chan, ctx->rect, +			 NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1); +	OUT_RING  (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE); + +	switch (chan->device->chipset & 0xf0) { +	case 0x00: +	case 0x10: +		class = NV04_SWIZZLED_SURFACE; +		break; +	case 0x20: +		class = NV20_SWIZZLED_SURFACE; +		break; +	case 0x30: +		class = NV30_SWIZZLED_SURFACE; +		break; +	case 0x40: +	case 0x60: +		class = NV40_SWIZZLED_SURFACE; +		break; +	default: +		/* Famous last words: this really can't happen.. */ +		assert(0); +		break; +	} + +	ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->swzsurf); +	if (ret) { +		nv04_surface_2d_takedown(&ctx); +		return NULL; +	} + +	if (chan->device->chipset < 0x10) { +		class = NV04_SCALED_IMAGE_FROM_MEMORY; +	} else +	if (chan->device->chipset < 0x40) { +		class = NV10_SCALED_IMAGE_FROM_MEMORY; +	} else { +		class = NV40_SCALED_IMAGE_FROM_MEMORY; +	} + +	ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->sifm); +	if (ret) { +		nv04_surface_2d_takedown(&ctx); +		return NULL; +	} + +	ctx->nvws = nvws; +	ctx->copy = nv04_surface_copy; +	ctx->fill = nv04_surface_fill; +	return ctx; +} diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.h b/src/gallium/drivers/nv04/nv04_surface_2d.h new file mode 100644 index 0000000000..21b8f86960 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_surface_2d.h @@ -0,0 +1,29 @@ +#ifndef __NV04_SURFACE_2D_H__ +#define __NV04_SURFACE_2D_H__ + +struct nv04_surface_2d { +	struct nouveau_winsys *nvws; +	struct nouveau_notifier *ntfy; +	struct nouveau_grobj *surf2d; +	struct nouveau_grobj *swzsurf; +	struct nouveau_grobj *m2mf; +	struct nouveau_grobj *rect; +	struct nouveau_grobj *blit; +	struct nouveau_grobj *sifm; + +	struct pipe_buffer *(*buf)(struct pipe_surface *); + +	void (*copy)(struct nv04_surface_2d *, struct pipe_surface *dst, +		     int dx, int dy, struct pipe_surface *src, int sx, int sy, +		     int w, int h); +	void (*fill)(struct nv04_surface_2d *, struct pipe_surface *dst, +		     int dx, int dy, int w, int h, unsigned value); +}; + +struct nv04_surface_2d * +nv04_surface_2d_init(struct nouveau_winsys *nvws); + +void +nv04_surface_2d_takedown(struct nv04_surface_2d **); + +#endif diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c new file mode 100644 index 0000000000..d21a0e34f7 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_vbo.c @@ -0,0 +1,78 @@ +#include "draw/draw_context.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv04_context.h" +#include "nv04_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" + +boolean nv04_draw_elements( struct pipe_context *pipe, +                    struct pipe_buffer *indexBuffer, +                    unsigned indexSize, +                    unsigned prim, unsigned start, unsigned count) +{ +	struct nv04_context *nv04 = nv04_context( pipe ); +	struct draw_context *draw = nv04->draw; +	unsigned i; + +	nv04_emit_hw_state(nv04); + +	/* +	 * Map vertex buffers +	 */ +	for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { +		if (nv04->vtxbuf[i].buffer) { +			void *buf +				= pipe->winsys->buffer_map(pipe->winsys, +						nv04->vtxbuf[i].buffer, +						PIPE_BUFFER_USAGE_CPU_READ); +			draw_set_mapped_vertex_buffer(draw, i, buf); +		} +	} +	/* Map index buffer, if present */ +	if (indexBuffer) { +		void *mapped_indexes +			= pipe->winsys->buffer_map(pipe->winsys, indexBuffer, +					PIPE_BUFFER_USAGE_CPU_READ); +		draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); +	} +	else { +		/* no index/element buffer */ +		draw_set_mapped_element_buffer(draw, 0, NULL); +	} + +	draw_set_mapped_constant_buffer(draw, +					nv04->constbuf[PIPE_SHADER_VERTEX], +					nv04->constbuf_nr[PIPE_SHADER_VERTEX]); + +	/* draw! */ +	draw_arrays(nv04->draw, prim, start, count); + +	/* +	 * unmap vertex/index buffers +	 */ +	for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { +		if (nv04->vtxbuf[i].buffer) { +			pipe->winsys->buffer_unmap(pipe->winsys, nv04->vtxbuf[i].buffer); +			draw_set_mapped_vertex_buffer(draw, i, NULL); +		} +	} +	if (indexBuffer) { +		pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); +		draw_set_mapped_element_buffer(draw, 0, NULL); +	} + +	return TRUE; +} + +boolean nv04_draw_arrays( struct pipe_context *pipe, +				 unsigned prim, unsigned start, unsigned count) +{ +	printf("coucou in draw arrays\n"); +	return nv04_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + diff --git a/src/gallium/drivers/nv10/Makefile b/src/gallium/drivers/nv10/Makefile new file mode 100644 index 0000000000..4ba7ce586d --- /dev/null +++ b/src/gallium/drivers/nv10/Makefile @@ -0,0 +1,28 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv10 + +DRIVER_SOURCES = \ +	nv10_clear.c \ +	nv10_context.c \ +	nv10_fragprog.c \ +	nv10_fragtex.c \ +	nv10_miptree.c \ +	nv10_prim_vbuf.c \ +	nv10_screen.c \ +	nv10_state.c \ +	nv10_state_emit.c \ +	nv10_surface.c \ +	nv10_vbo.c + +C_SOURCES = \ +	$(COMMON_SOURCES) \ +	$(DRIVER_SOURCES) + +ASM_SOURCES =  + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv10/nv10_clear.c b/src/gallium/drivers/nv10/nv10_clear.c new file mode 100644 index 0000000000..be7e09cf4b --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_clear.c @@ -0,0 +1,12 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv10_context.h" + +void +nv10_clear(struct pipe_context *pipe, struct pipe_surface *ps, +	   unsigned clearValue) +{ +	pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +} diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c new file mode 100644 index 0000000000..ef2c0c5d9f --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -0,0 +1,296 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv10_context.h" +#include "nv10_screen.h" + +static void +nv10_flush(struct pipe_context *pipe, unsigned flags, +	   struct pipe_fence_handle **fence) +{ +	struct nv10_context *nv10 = nv10_context(pipe); + +	draw_flush(nv10->draw); + +	FIRE_RING(fence); +} + +static void +nv10_destroy(struct pipe_context *pipe) +{ +	struct nv10_context *nv10 = nv10_context(pipe); + +	if (nv10->draw) +		draw_destroy(nv10->draw); + +	FREE(nv10); +} + +static void nv10_init_hwctx(struct nv10_context *nv10) +{ +	struct nv10_screen *screen = nv10->screen; +	struct nouveau_winsys *nvws = screen->nvws; +	int i; +	float projectionmatrix[16]; + +	BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1); +	OUT_RING  (screen->sync->handle); +	BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY0, 2); +	OUT_RING  (nvws->channel->vram->handle); +	OUT_RING  (nvws->channel->gart->handle); +	BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY2, 2); +	OUT_RING  (nvws->channel->vram->handle); +	OUT_RING  (nvws->channel->vram->handle); + +	BEGIN_RING(celsius, NV10TCL_NOP, 1); +	OUT_RING  (0); + +	BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 2); +	OUT_RING  (0); +	OUT_RING  (0); + +	BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1); +	OUT_RING  ((0x7ff<<16)|0x800); +	BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1); +	OUT_RING  ((0x7ff<<16)|0x800); + +	for (i=1;i<8;i++) { +		BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1); +		OUT_RING  (0); +		BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1); +		OUT_RING  (0); +	} + +	BEGIN_RING(celsius, 0x290, 1); +	OUT_RING  ((0x10<<16)|1); +	BEGIN_RING(celsius, 0x3f4, 1); +	OUT_RING  (0); + +	BEGIN_RING(celsius, NV10TCL_NOP, 1); +	OUT_RING  (0); + +	if (nv10->screen->celsius->grclass != NV10TCL) { +		/* For nv11, nv17 */ +		BEGIN_RING(celsius, 0x120, 3); +		OUT_RING  (0); +		OUT_RING  (1); +		OUT_RING  (2); + +		BEGIN_RING(celsius, NV10TCL_NOP, 1); +		OUT_RING  (0); +	} + +	BEGIN_RING(celsius, NV10TCL_NOP, 1); +	OUT_RING  (0); + +	/* Set state */ +	BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 2); +	OUT_RING  (0x207); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_TX_ENABLE(0), 2); +	OUT_RING  (0); +	OUT_RING  (0); + +	BEGIN_RING(celsius, NV10TCL_RC_IN_ALPHA(0), 12); +	OUT_RING  (0x30141010); +	OUT_RING  (0); +	OUT_RING  (0x20040000); +	OUT_RING  (0); +	OUT_RING  (0); +	OUT_RING  (0); +	OUT_RING  (0x00000c00); +	OUT_RING  (0); +	OUT_RING  (0x00000c00); +	OUT_RING  (0x18000000); +	OUT_RING  (0x300e0300); +	OUT_RING  (0x0c091c80); + +	BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 2); +	OUT_RING  (1); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2); +	OUT_RING  (0); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_SRC, 4); +	OUT_RING  (1); +	OUT_RING  (0); +	OUT_RING  (0); +	OUT_RING  (0x8006); +	BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 8); +	OUT_RING  (0xff); +	OUT_RING  (0x207); +	OUT_RING  (0); +	OUT_RING  (0xff); +	OUT_RING  (0x1e00); +	OUT_RING  (0x1e00); +	OUT_RING  (0x1e00); +	OUT_RING  (0x1d01); +	BEGIN_RING(celsius, NV10TCL_NORMALIZE_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 2); +	OUT_RING  (0); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_LIGHT_MODEL, 1); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_COLOR_CONTROL, 1); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_ENABLED_LIGHTS, 1); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3); +	OUT_RING  (0); +	OUT_RING  (0); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); +	OUT_RING  (0x201); +	BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2); +	OUT_RING  (0); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); +	OUT_RING  (8); +	BEGIN_RING(celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2); +	OUT_RING  (0); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_LINE_WIDTH, 1); +	OUT_RING  (8); +	BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); +	OUT_RING  (0x1b02); +	OUT_RING  (0x1b02); +	BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); +	OUT_RING  (0x405); +	OUT_RING  (0x901); +	BEGIN_RING(celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_TX_GEN_S(0), 8); +	for (i=0;i<8;i++) { +		OUT_RING  (0); +	} +	BEGIN_RING(celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3); +	OUT_RING  (0x3fc00000);	/* -1.50 */ +	OUT_RING  (0xbdb8aa0a);	/* -0.09 */ +	OUT_RING  (0);		/*  0.00 */ + +	BEGIN_RING(celsius, NV10TCL_NOP, 1); +	OUT_RING  (0); + +	BEGIN_RING(celsius, NV10TCL_FOG_MODE, 2); +	OUT_RING  (0x802); +	OUT_RING  (2); +	/* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when +	 * using texturing, except when using the texture matrix +	 */ +	BEGIN_RING(celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1); +	OUT_RING  (6); +	BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); +	OUT_RING  (0x01010101); + +	/* Set vertex component */ +	BEGIN_RING(celsius, NV10TCL_VERTEX_COL_4F_R, 4); +	OUT_RINGf (1.0); +	OUT_RINGf (1.0); +	OUT_RINGf (1.0); +	OUT_RINGf (1.0); +	BEGIN_RING(celsius, NV10TCL_VERTEX_COL2_3F_R, 3); +	OUT_RING  (0); +	OUT_RING  (0); +	OUT_RING  (0); +	BEGIN_RING(celsius, NV10TCL_VERTEX_NOR_3F_X, 3); +	OUT_RING  (0); +	OUT_RING  (0); +	OUT_RINGf (1.0); +	BEGIN_RING(celsius, NV10TCL_VERTEX_TX0_4F_S, 4); +	OUT_RINGf (0.0); +	OUT_RINGf (0.0); +	OUT_RINGf (0.0); +	OUT_RINGf (1.0); +	BEGIN_RING(celsius, NV10TCL_VERTEX_TX1_4F_S, 4); +	OUT_RINGf (0.0); +	OUT_RINGf (0.0); +	OUT_RINGf (0.0); +	OUT_RINGf (1.0); +	BEGIN_RING(celsius, NV10TCL_VERTEX_FOG_1F, 1); +	OUT_RINGf (0.0); +	BEGIN_RING(celsius, NV10TCL_EDGEFLAG_ENABLE, 1); +	OUT_RING  (1); + +	memset(projectionmatrix, 0, sizeof(projectionmatrix)); +	BEGIN_RING(celsius, NV10TCL_PROJECTION_MATRIX(0), 16); +	projectionmatrix[0*4+0] = 1.0; +	projectionmatrix[1*4+1] = 1.0; +	projectionmatrix[2*4+2] = 1.0; +	projectionmatrix[3*4+3] = 1.0; +	for (i=0;i<16;i++) { +		OUT_RINGf  (projectionmatrix[i]); +	} + +	BEGIN_RING(celsius, NV10TCL_DEPTH_RANGE_NEAR, 2); +	OUT_RING  (0.0); +	OUT_RINGf  (16777216.0); + +	BEGIN_RING(celsius, NV10TCL_VIEWPORT_SCALE_X, 4); +	OUT_RINGf  (-2048.0); +	OUT_RINGf  (-2048.0); +	OUT_RINGf  (16777215.0 * 0.5); +	OUT_RING  (0); + +	FIRE_RING (NULL); +} + +static void +nv10_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + +struct pipe_context * +nv10_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ +	struct nv10_screen *screen = nv10_screen(pscreen); +	struct pipe_winsys *ws = pscreen->winsys; +	struct nv10_context *nv10; +	struct nouveau_winsys *nvws = screen->nvws; + +	nv10 = CALLOC(1, sizeof(struct nv10_context)); +	if (!nv10) +		return NULL; +	nv10->screen = screen; +	nv10->pctx_id = pctx_id; + +	nv10->nvws = nvws; + +	nv10->pipe.winsys = ws; +	nv10->pipe.screen = pscreen; +	nv10->pipe.destroy = nv10_destroy; +	nv10->pipe.set_edgeflags = nv10_set_edgeflags; +	nv10->pipe.draw_arrays = nv10_draw_arrays; +	nv10->pipe.draw_elements = nv10_draw_elements; +	nv10->pipe.clear = nv10_clear; +	nv10->pipe.flush = nv10_flush; + +	nv10_init_surface_functions(nv10); +	nv10_init_state_functions(nv10); + +	nv10->draw = draw_create(); +	assert(nv10->draw); +	draw_set_rasterize_stage(nv10->draw, nv10_draw_vbuf_stage(nv10)); + +	nv10_init_hwctx(nv10); + +	return &nv10->pipe; +} + diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h new file mode 100644 index 0000000000..f3b56de25a --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -0,0 +1,153 @@ +#ifndef __NV10_CONTEXT_H__ +#define __NV10_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx)                                              \ +	struct nv10_screen *ctx = nv10->screen +#include "nouveau/nouveau_push.h" + +#include "nv10_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ +	fprintf(stderr, "%s:%d -  "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ +	fprintf(stderr, "nouveau: "fmt, ##args); + +#define NV10_NEW_VERTPROG	(1 << 0) +#define NV10_NEW_FRAGPROG	(1 << 1) +#define NV10_NEW_VTXARRAYS	(1 << 2) +#define NV10_NEW_BLEND		(1 << 3) +#define NV10_NEW_BLENDCOL	(1 << 4) +#define NV10_NEW_RAST 		(1 << 5) +#define NV10_NEW_DSA  		(1 << 6) +#define NV10_NEW_VIEWPORT	(1 << 7) +#define NV10_NEW_SCISSOR	(1 << 8) +#define NV10_NEW_FRAMEBUFFER	(1 << 9) + +#include "nv10_screen.h" + +struct nv10_context { +	struct pipe_context pipe; + +	struct nouveau_winsys *nvws; +	struct nv10_screen *screen; +	unsigned pctx_id; + +	struct draw_context *draw; + +	uint32_t dirty; + +	struct nv10_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; +	struct nv10_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; +	unsigned dirty_samplers; +	unsigned fp_samplers; +	unsigned vp_samplers; + +	uint32_t rt_enable; +	struct pipe_buffer *rt[4]; +	struct pipe_buffer *zeta; +	uint32_t lma_offset; + +	struct nv10_blend_state *blend; +	struct pipe_blend_color *blend_color; +	struct nv10_rasterizer_state *rast; +	struct nv10_depth_stencil_alpha_state *dsa; +	struct pipe_viewport_state *viewport; +	struct pipe_scissor_state *scissor; +	struct pipe_framebuffer_state *framebuffer; + +	//struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; +	float *constbuf[PIPE_SHADER_TYPES][32][4]; +	unsigned constbuf_nr[PIPE_SHADER_TYPES]; + +	struct vertex_info vertex_info; + +	struct { +		struct pipe_buffer *buffer; +		uint32_t format; +	} tex[2]; + +	unsigned vb_enable; +	struct { +		struct pipe_buffer *buffer; +		unsigned delta; +	} vb[16]; + +/*	struct { +	 +		struct nouveau_resource *exec_heap; +		struct nouveau_resource *data_heap; + +		struct nv10_vertex_program *active; + +		struct nv10_vertex_program *current; +	} vertprog; +*/ +	struct { +		struct nv10_fragment_program *active; + +		struct nv10_fragment_program *current; +		struct pipe_buffer *constant_buf; +	} fragprog; + +	struct pipe_vertex_buffer  vtxbuf[PIPE_MAX_ATTRIBS]; +	struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; +}; + +static INLINE struct nv10_context * +nv10_context(struct pipe_context *pipe) +{ +	return (struct nv10_context *)pipe; +} + +extern void nv10_init_state_functions(struct nv10_context *nv10); +extern void nv10_init_surface_functions(struct nv10_context *nv10); + +extern void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen); + +/* nv10_clear.c */ +extern void nv10_clear(struct pipe_context *pipe, struct pipe_surface *ps, +		       unsigned clearValue); + +/* nv10_draw.c */ +extern struct draw_stage *nv10_draw_render_stage(struct nv10_context *nv10); + +/* nv10_fragprog.c */ +extern void nv10_fragprog_bind(struct nv10_context *, +			       struct nv10_fragment_program *); +extern void nv10_fragprog_destroy(struct nv10_context *, +				  struct nv10_fragment_program *); + +/* nv10_fragtex.c */ +extern void nv10_fragtex_bind(struct nv10_context *); + +/* nv10_prim_vbuf.c */ +struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 ); +extern void nv10_vtxbuf_bind(struct nv10_context* nv10); + +/* nv10_state.c and friends */ +extern void nv10_emit_hw_state(struct nv10_context *nv10); +extern void nv10_state_tex_update(struct nv10_context *nv10); + +/* nv10_vbo.c */ +extern boolean nv10_draw_arrays(struct pipe_context *, unsigned mode, +				unsigned start, unsigned count); +extern boolean nv10_draw_elements( struct pipe_context *pipe, +                    struct pipe_buffer *indexBuffer, +                    unsigned indexSize, +                    unsigned prim, unsigned start, unsigned count); + + +#endif diff --git a/src/gallium/drivers/nv10/nv10_fragprog.c b/src/gallium/drivers/nv10/nv10_fragprog.c new file mode 100644 index 0000000000..698db5a16a --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_fragprog.c @@ -0,0 +1,21 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv10_context.h" + +void +nv10_fragprog_bind(struct nv10_context *nv10, struct nv10_fragment_program *fp) +{ +} + +void +nv10_fragprog_destroy(struct nv10_context *nv10, +		      struct nv10_fragment_program *fp) +{ +} + diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c new file mode 100644 index 0000000000..27f2f87584 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_fragtex.c @@ -0,0 +1,124 @@ +#include "nv10_context.h" +#include "nouveau/nouveau_util.h" + +#define _(m,tf)                                                                \ +{                                                                              \ +  TRUE,                                                                        \ +  PIPE_FORMAT_##m,                                                             \ +  NV10TCL_TX_FORMAT_FORMAT_##tf,                                               \ +} + +struct nv10_texture_format { +	boolean defined; +	uint	pipe; +	int     format; +}; + +static struct nv10_texture_format +nv10_texture_formats[] = { +	_(A8R8G8B8_UNORM, A8R8G8B8), +	_(A1R5G5B5_UNORM, A1R5G5B5), +	_(A4R4G4B4_UNORM, A4R4G4B4), +	_(L8_UNORM      , L8      ), +	_(A8_UNORM      , A8      ), +	_(A8L8_UNORM    , A8L8    ), +//	_(RGB_DXT1      , DXT1,   ), +//	_(RGBA_DXT1     , DXT1,   ), +//	_(RGBA_DXT3     , DXT3,   ), +//	_(RGBA_DXT5     , DXT5,   ), +	{}, +}; + +static struct nv10_texture_format * +nv10_fragtex_format(uint pipe_format) +{ +	struct nv10_texture_format *tf = nv10_texture_formats; + +	while (tf->defined) { +		if (tf->pipe == pipe_format) +			return tf; +		tf++; +	} + +	return NULL; +} + + +static void +nv10_fragtex_build(struct nv10_context *nv10, int unit) +{ +#if 0 +	struct nv10_sampler_state *ps = nv10->tex_sampler[unit]; +	struct nv10_miptree *nv10mt = nv10->tex_miptree[unit]; +	struct pipe_texture *pt = &nv10mt->base; +	struct nv10_texture_format *tf; +	uint32_t txf, txs, txp; + +	tf = nv10_fragtex_format(pt->format); +	if (!tf || !tf->defined) { +		NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format); +		return; +	} + +	txf  = tf->format << 8; +	txf |= (pt->last_level + 1) << 16; +	txf |= log2i(pt->width[0]) << 20; +	txf |= log2i(pt->height[0]) << 24; +	txf |= log2i(pt->depth[0]) << 28; +	txf |= 8; + +	switch (pt->target) { +	case PIPE_TEXTURE_CUBE: +		txf |= NV10TCL_TX_FORMAT_CUBE_MAP; +		/* fall-through */ +	case PIPE_TEXTURE_2D: +		txf |= (2<<4); +		break; +	case PIPE_TEXTURE_1D: +		txf |= (1<<4); +		break; +	default: +		NOUVEAU_ERR("Unknown target %d\n", pt->target); +		return; +	} + +	BEGIN_RING(celsius, NV10TCL_TX_OFFSET(unit), 8); +	OUT_RELOCl(nv10mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); +	OUT_RELOCd(nv10mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); +	OUT_RING  (ps->wrap); +	OUT_RING  (0x40000000); /* enable */ +	OUT_RING  (txs); +	OUT_RING  (ps->filt | 0x2000 /* magic */); +	OUT_RING  ((pt->width[0] << 16) | pt->height[0]); +	OUT_RING  (ps->bcol); +#endif +} + +void +nv10_fragtex_bind(struct nv10_context *nv10) +{ +#if 0 +	struct nv10_fragment_program *fp = nv10->fragprog.active; +	unsigned samplers, unit; + +	samplers = nv10->fp_samplers & ~fp->samplers; +	while (samplers) { +		unit = ffs(samplers) - 1; +		samplers &= ~(1 << unit); + +		BEGIN_RING(celsius, NV10TCL_TX_ENABLE(unit), 1); +		OUT_RING  (0); +	} + +	samplers = nv10->dirty_samplers & fp->samplers; +	while (samplers) { +		unit = ffs(samplers) - 1; +		samplers &= ~(1 << unit); + +		nv10_fragtex_build(nv10, unit); +	} + +	nv10->fp_samplers = fp->samplers; +#endif +} + diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c new file mode 100644 index 0000000000..9616135461 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -0,0 +1,174 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv10_context.h" +#include "nv10_screen.h" + +static void +nv10_miptree_layout(struct nv10_miptree *nv10mt) +{ +	struct pipe_texture *pt = &nv10mt->base; +	boolean swizzled = FALSE; +	uint width = pt->width[0], height = pt->height[0]; +	uint offset = 0; +	int nr_faces, l, f; + +	if (pt->target == PIPE_TEXTURE_CUBE) { +		nr_faces = 6; +	} else { +		nr_faces = 1; +	} +	 +	for (l = 0; l <= pt->last_level; l++) { +		pt->width[l] = width; +		pt->height[l] = height; +		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); +		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + +		if (swizzled) +			nv10mt->level[l].pitch = pt->nblocksx[l] * pt->block.size; +		else +			nv10mt->level[l].pitch = pt->nblocksx[0] * pt->block.size; +		nv10mt->level[l].pitch = (nv10mt->level[l].pitch + 63) & ~63; + +		nv10mt->level[l].image_offset = +			CALLOC(nr_faces, sizeof(unsigned)); + +		width  = MAX2(1, width  >> 1); +		height = MAX2(1, height >> 1); + +	} + +	for (f = 0; f < nr_faces; f++) { +		for (l = 0; l <= pt->last_level; l++) { +			nv10mt->level[l].image_offset[f] = offset; +			offset += nv10mt->level[l].pitch * pt->height[l]; +		} +	} + +	nv10mt->total_size = offset; +} + +static struct pipe_texture * +nv10_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, +		     const unsigned *stride, struct pipe_buffer *pb) +{ +	struct nv10_miptree *mt; + +	/* Only supports 2D, non-mipmapped textures for the moment */ +	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || +	    pt->depth[0] != 1) +		return NULL; + +	mt = CALLOC_STRUCT(nv10_miptree); +	if (!mt) +		return NULL; + +	mt->base = *pt; +	mt->base.refcount = 1; +	mt->base.screen = pscreen; +	mt->level[0].pitch = stride[0]; +	mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + +	pipe_buffer_reference(pscreen, &mt->buffer, pb); +	return &mt->base; +} + +static struct pipe_texture * +nv10_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) +{ +	struct pipe_winsys *ws = screen->winsys; +	struct nv10_miptree *mt; + +	mt = MALLOC(sizeof(struct nv10_miptree)); +	if (!mt) +		return NULL; +	mt->base = *pt; +	mt->base.refcount = 1; +	mt->base.screen = screen; + +	nv10_miptree_layout(mt); + +	mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, +					   mt->total_size); +	if (!mt->buffer) { +		FREE(mt); +		return NULL; +	} +	 +	return &mt->base; +} + +static void +nv10_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) +{ +	struct pipe_texture *mt = *pt; + +	*pt = NULL; +	if (--mt->refcount <= 0) { +		struct nv10_miptree *nv10mt = (struct nv10_miptree *)mt; +		int l; + +		pipe_buffer_reference(screen, &nv10mt->buffer, NULL); +		for (l = 0; l <= mt->last_level; l++) { +			if (nv10mt->level[l].image_offset) +				FREE(nv10mt->level[l].image_offset); +		} +		FREE(nv10mt); +	} +} + +static void +nv10_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt, +		    uint face, uint levels) +{ +} + + +static struct pipe_surface * +nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, +			 unsigned face, unsigned level, unsigned zslice, +			 unsigned flags) +{ +	struct pipe_winsys *ws = screen->winsys; +	struct nv10_miptree *nv10mt = (struct nv10_miptree *)pt; +	struct pipe_surface *ps; + +	ps = CALLOC_STRUCT(pipe_surface); +	if (!ps) +		return NULL; +	pipe_texture_reference(&ps->texture, pt); +	ps->format = pt->format; +	ps->width = pt->width[level]; +	ps->height = pt->height[level]; +	ps->block = pt->block; +	ps->nblocksx = pt->nblocksx[level]; +	ps->nblocksy = pt->nblocksy[level]; +	ps->stride = nv10mt->level[level].pitch; +	ps->refcount = 1; + +	if (pt->target == PIPE_TEXTURE_CUBE) { +		ps->offset = nv10mt->level[level].image_offset[face]; +	} else { +		ps->offset = nv10mt->level[level].image_offset[0]; +	} + +	return ps; +} + +static void +nv10_miptree_surface_release(struct pipe_screen *screen, +			     struct pipe_surface **surface) +{ +} + +void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ +	pscreen->texture_create = nv10_miptree_create; +	pscreen->texture_blanket = nv10_miptree_blanket; +	pscreen->texture_release = nv10_miptree_release; +	pscreen->get_tex_surface = nv10_miptree_surface_get; +	pscreen->tex_surface_release = nv10_miptree_surface_release; +} + diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c new file mode 100644 index 0000000000..7435d87315 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -0,0 +1,245 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * \file + * Build post-transformation, post-clipping vertex buffers and element + * lists by hooking into the end of the primitive pipeline and + * manipulating the vertex_id field in the vertex headers. + * + * XXX: work in progress  + *  + * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_debug.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv10_context.h" +#include "nv10_state.h" + +#include "draw/draw_vbuf.h" + +/** + * Primitive renderer for nv10. + */ +struct nv10_vbuf_render { +	struct vbuf_render base; + +	struct nv10_context *nv10;    + +	/** Vertex buffer */ +	struct pipe_buffer* buffer; + +	/** Vertex size in bytes */ +	unsigned vertex_size; + +	/** Hardware primitive */ +	unsigned hwprim; +}; + + +void nv10_vtxbuf_bind( struct nv10_context* nv10 ) +{ +	int i; +	for(i = 0; i < 8; i++) { +		BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET(i), 1); +		OUT_RING(0/*nv10->vtxbuf*/); +		BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT(i) ,1); +		OUT_RING(0/*XXX*/); +	} +} + +/** + * Basically a cast wrapper. + */ +static INLINE struct nv10_vbuf_render * +nv10_vbuf_render( struct vbuf_render *render ) +{ +	assert(render); +	return (struct nv10_vbuf_render *)render; +} + + +static const struct vertex_info * +nv10_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ +	struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); +	struct nv10_context *nv10 = nv10_render->nv10; + +	nv10_emit_hw_state(nv10); + +	return &nv10->vertex_info; +} + + +static void * +nv10_vbuf_render_allocate_vertices( struct vbuf_render *render, +		ushort vertex_size, +		ushort nr_vertices ) +{ +	struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); +	struct nv10_context *nv10 = nv10_render->nv10; +	struct pipe_winsys *winsys = nv10->pipe.winsys; +	size_t size = (size_t)vertex_size * (size_t)nr_vertices; + +	assert(!nv10_render->buffer); +	nv10_render->buffer = winsys->buffer_create(winsys, 64, PIPE_BUFFER_USAGE_VERTEX, size); + +	nv10->dirty |= NV10_NEW_VTXARRAYS; + +	return winsys->buffer_map(winsys,  +			nv10_render->buffer,  +			PIPE_BUFFER_USAGE_CPU_WRITE); +} + + +static boolean +nv10_vbuf_render_set_primitive( struct vbuf_render *render,  +		unsigned prim ) +{ +	struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); +	unsigned hwp = nvgl_primitive(prim); +	if (hwp == 0) +		return FALSE; + +	nv10_render->hwprim = hwp; +	return TRUE; +} + + +static void  +nv10_vbuf_render_draw( struct vbuf_render *render, +		const ushort *indices, +		uint nr_indices) +{ +	struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); +	struct nv10_context *nv10 = nv10_render->nv10; +	int push, i; + +	nv10_emit_hw_state(nv10); + +	BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); +	OUT_RELOCl(nv10_render->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + +	BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); +	OUT_RING(nv10_render->hwprim); + +	if (nr_indices & 1) { +		BEGIN_RING(celsius, NV10TCL_VB_ELEMENT_U32, 1); +		OUT_RING  (indices[0]); +		indices++; nr_indices--; +	} + +	while (nr_indices) { +		// XXX too big/small ? check the size +		push = MIN2(nr_indices, 1200 * 2); + +		BEGIN_RING_NI(celsius, NV10TCL_VB_ELEMENT_U16, push >> 1); +		for (i = 0; i < push; i+=2) +			OUT_RING((indices[i+1] << 16) | indices[i]); + +		nr_indices -= push; +		indices  += push; +	} + +	BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); +	OUT_RING  (0); +} + + +static void +nv10_vbuf_render_release_vertices( struct vbuf_render *render, +		void *vertices,  +		unsigned vertex_size, +		unsigned vertices_used ) +{ +	struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); +	struct nv10_context *nv10 = nv10_render->nv10; +	struct pipe_winsys *winsys = nv10->pipe.winsys; +	struct pipe_screen *pscreen = &nv10->screen->pipe; + +	assert(nv10_render->buffer); +	winsys->buffer_unmap(winsys, nv10_render->buffer); +	pipe_buffer_reference(pscreen, &nv10_render->buffer, NULL); +} + + +static void +nv10_vbuf_render_destroy( struct vbuf_render *render ) +{ +	struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); +	FREE(nv10_render); +} + + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +nv10_vbuf_render_create( struct nv10_context *nv10 ) +{ +	struct nv10_vbuf_render *nv10_render = CALLOC_STRUCT(nv10_vbuf_render); + +	nv10_render->nv10 = nv10; + +	nv10_render->base.max_vertex_buffer_bytes = 16*1024; +	nv10_render->base.max_indices = 1024; +	nv10_render->base.get_vertex_info = nv10_vbuf_render_get_vertex_info; +	nv10_render->base.allocate_vertices = nv10_vbuf_render_allocate_vertices; +	nv10_render->base.set_primitive = nv10_vbuf_render_set_primitive; +	nv10_render->base.draw = nv10_vbuf_render_draw; +	nv10_render->base.release_vertices = nv10_vbuf_render_release_vertices; +	nv10_render->base.destroy = nv10_vbuf_render_destroy; + +	return &nv10_render->base; +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 ) +{ +	struct vbuf_render *render; +	struct draw_stage *stage; + +	render = nv10_vbuf_render_create(nv10); +	if(!render) +		return NULL; + +	stage = draw_vbuf_stage( nv10->draw, render ); +	if(!stage) { +		render->destroy(render); +		return NULL; +	} + +	return stage; +} diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c new file mode 100644 index 0000000000..f417b06c94 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -0,0 +1,226 @@ +#include "pipe/p_screen.h" +#include "util/u_simple_screen.h" + +#include "nv10_context.h" +#include "nv10_screen.h" + +static const char * +nv10_screen_get_name(struct pipe_screen *screen) +{ +	struct nv10_screen *nv10screen = nv10_screen(screen); +	struct nouveau_device *dev = nv10screen->nvws->channel->device; +	static char buffer[128]; + +	snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); +	return buffer; +} + +static const char * +nv10_screen_get_vendor(struct pipe_screen *screen) +{ +	return "nouveau"; +} + +static int +nv10_screen_get_param(struct pipe_screen *screen, int param) +{ +	switch (param) { +	case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: +		return 2; +	case PIPE_CAP_NPOT_TEXTURES: +		return 0; +	case PIPE_CAP_TWO_SIDED_STENCIL: +		return 0; +	case PIPE_CAP_GLSL: +		return 0; +	case PIPE_CAP_S3TC: +		return 0; +	case PIPE_CAP_ANISOTROPIC_FILTER: +		return 1; +	case PIPE_CAP_POINT_SPRITE: +		return 0; +	case PIPE_CAP_MAX_RENDER_TARGETS: +		return 1; +	case PIPE_CAP_OCCLUSION_QUERY: +		return 0; +	case PIPE_CAP_TEXTURE_SHADOW_MAP: +		return 0; +	case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: +		return 12; +	case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: +		return 0; +	case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: +		return 12; +	case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: +		return 0; +	case NOUVEAU_CAP_HW_VTXBUF: +	case NOUVEAU_CAP_HW_IDXBUF: +		return 0; +	default: +		NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); +		return 0; +	} +} + +static float +nv10_screen_get_paramf(struct pipe_screen *screen, int param) +{ +	switch (param) { +	case PIPE_CAP_MAX_LINE_WIDTH: +	case PIPE_CAP_MAX_LINE_WIDTH_AA: +		return 10.0; +	case PIPE_CAP_MAX_POINT_WIDTH: +	case PIPE_CAP_MAX_POINT_WIDTH_AA: +		return 64.0; +	case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: +		return 2.0; +	case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: +		return 4.0; +	default: +		NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); +		return 0.0; +	} +} + +static boolean +nv10_screen_is_format_supported(struct pipe_screen *screen, +				enum pipe_format format, +				enum pipe_texture_target target, +				unsigned tex_usage, unsigned geom_flags) +{ +	if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { +		switch (format) { +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_R5G6B5_UNORM:  +		case PIPE_FORMAT_Z24S8_UNORM: +		case PIPE_FORMAT_Z16_UNORM: +			return TRUE; +		default: +			break; +		} +	} else { +		switch (format) { +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_A1R5G5B5_UNORM: +		case PIPE_FORMAT_A4R4G4B4_UNORM: +		case PIPE_FORMAT_R5G6B5_UNORM:  +		case PIPE_FORMAT_L8_UNORM: +		case PIPE_FORMAT_A8_UNORM: +		case PIPE_FORMAT_I8_UNORM: +			return TRUE; +		default: +			break; +		} +	} + +	return FALSE; +} + +static void * +nv10_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, +		 unsigned flags ) +{ +	struct pipe_winsys *ws = screen->winsys; +	void *map; +        struct nv10_miptree *nv10mt = (struct nv10_miptree *)surface->texture; + +	map = ws->buffer_map(ws, nv10mt->buffer, flags); +	if (!map) +		return NULL; + +	return map + surface->offset; +} + +static void +nv10_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ +	struct pipe_winsys *ws = screen->winsys; +        struct nv10_miptree *nv10mt = (struct nv10_miptree *)surface->texture; + +	ws->buffer_unmap(ws, nv10mt->buffer); +} + +static void +nv10_screen_destroy(struct pipe_screen *pscreen) +{ +	struct nv10_screen *screen = nv10_screen(pscreen); +	struct nouveau_winsys *nvws = screen->nvws; + +	nvws->notifier_free(&screen->sync); +	nvws->grobj_free(&screen->celsius); + +	FREE(pscreen); +} + +static struct pipe_buffer * +nv10_surface_buffer(struct pipe_surface *surf) +{ +	struct nv10_miptree *mt = (struct nv10_miptree *)surf->texture; + +	return mt->buffer; +} + +struct pipe_screen * +nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ +	struct nv10_screen *screen = CALLOC_STRUCT(nv10_screen); +	unsigned celsius_class; +	unsigned chipset = nvws->channel->device->chipset; +	int ret; + +	if (!screen) +		return NULL; +	screen->nvws = nvws; + +	/* 2D engine setup */ +	screen->eng2d = nv04_surface_2d_init(nvws); +	screen->eng2d->buf = nv10_surface_buffer; + +	/* 3D object */ +	if (chipset>=0x20) +		celsius_class=NV11TCL; +	else if (chipset>=0x17) +		celsius_class=NV17TCL; +	else if (chipset>=0x11) +		celsius_class=NV11TCL; +	else +		celsius_class=NV10TCL; + +	if (!celsius_class) { +		NOUVEAU_ERR("Unknown nv1x chipset: nv%02x\n", chipset); +		return NULL; +	} + +	ret = nvws->grobj_alloc(nvws, celsius_class, &screen->celsius); +	if (ret) { +		NOUVEAU_ERR("Error creating 3D object: %d\n", ret); +		return FALSE; +	} + +	/* Notifier for sync purposes */ +	ret = nvws->notifier_alloc(nvws, 1, &screen->sync); +	if (ret) { +		NOUVEAU_ERR("Error creating notifier object: %d\n", ret); +		nv10_screen_destroy(&screen->pipe); +		return NULL; +	} + +	screen->pipe.winsys = ws; +	screen->pipe.destroy = nv10_screen_destroy; + +	screen->pipe.get_name = nv10_screen_get_name; +	screen->pipe.get_vendor = nv10_screen_get_vendor; +	screen->pipe.get_param = nv10_screen_get_param; +	screen->pipe.get_paramf = nv10_screen_get_paramf; + +	screen->pipe.is_format_supported = nv10_screen_is_format_supported; + +	screen->pipe.surface_map = nv10_surface_map; +	screen->pipe.surface_unmap = nv10_surface_unmap; + +	nv10_screen_init_miptree_functions(&screen->pipe); +	u_simple_screen_init(&screen->pipe); + +	return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv10/nv10_screen.h b/src/gallium/drivers/nv10/nv10_screen.h new file mode 100644 index 0000000000..60102a369a --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_screen.h @@ -0,0 +1,24 @@ +#ifndef __NV10_SCREEN_H__ +#define __NV10_SCREEN_H__ + +#include "pipe/p_screen.h" +#include "nv04/nv04_surface_2d.h" + +struct nv10_screen { +	struct pipe_screen pipe; + +	struct nouveau_winsys *nvws; + +	/* HW graphics objects */ +	struct nv04_surface_2d *eng2d; +	struct nouveau_grobj *celsius; +	struct nouveau_notifier *sync; +}; + +static INLINE struct nv10_screen * +nv10_screen(struct pipe_screen *screen) +{ +	return (struct nv10_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c new file mode 100644 index 0000000000..119af66dfd --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -0,0 +1,589 @@ +#include "draw/draw_context.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv10_context.h" +#include "nv10_state.h" + +static void * +nv10_blend_state_create(struct pipe_context *pipe, +			const struct pipe_blend_state *cso) +{ +	struct nv10_blend_state *cb; + +	cb = MALLOC(sizeof(struct nv10_blend_state)); + +	cb->b_enable = cso->blend_enable ? 1 : 0; +	cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | +			 (nvgl_blend_func(cso->rgb_src_factor))); +	cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) | +			 (nvgl_blend_func(cso->rgb_dst_factor))); + +	cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) | +		      ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) | +		      ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) | +		      ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0)); + +	cb->d_enable = cso->dither ? 1 : 0; + +	return (void *)cb; +} + +static void +nv10_blend_state_bind(struct pipe_context *pipe, void *blend) +{ +	struct nv10_context *nv10 = nv10_context(pipe); + +	nv10->blend = (struct nv10_blend_state*)blend; + +	nv10->dirty |= NV10_NEW_BLEND; +} + +static void +nv10_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	FREE(hwcso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { +	unsigned ret; + +	switch (wrap) { +	case PIPE_TEX_WRAP_REPEAT: +		ret = NV10TCL_TX_FORMAT_WRAP_S_REPEAT; +		break; +	case PIPE_TEX_WRAP_MIRROR_REPEAT: +		ret = NV10TCL_TX_FORMAT_WRAP_S_MIRRORED_REPEAT; +		break; +	case PIPE_TEX_WRAP_CLAMP_TO_EDGE: +		ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE; +		break; +	case PIPE_TEX_WRAP_CLAMP_TO_BORDER: +		ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_BORDER; +		break; +	case PIPE_TEX_WRAP_CLAMP: +		ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP; +		break; +	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: +	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: +	case PIPE_TEX_WRAP_MIRROR_CLAMP: +	default: +		NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); +		ret = NV10TCL_TX_FORMAT_WRAP_S_REPEAT; +		break; +	} + +	return ret >> NV10TCL_TX_FORMAT_WRAP_S_SHIFT; +} + +static void * +nv10_sampler_state_create(struct pipe_context *pipe, +			  const struct pipe_sampler_state *cso) +{ +	struct nv10_sampler_state *ps; +	uint32_t filter = 0; + +	ps = MALLOC(sizeof(struct nv10_sampler_state)); + +	ps->wrap = ((wrap_mode(cso->wrap_s) << NV10TCL_TX_FORMAT_WRAP_S_SHIFT) | +		    (wrap_mode(cso->wrap_t) << NV10TCL_TX_FORMAT_WRAP_T_SHIFT)); + +	ps->en = 0; +	if (cso->max_anisotropy > 1.0) { +		/* no idea, binary driver sets it, works without it.. meh.. */ +		ps->wrap |= (1 << 5); + +/*		if (cso->max_anisotropy >= 16.0) { +			ps->en |= NV10TCL_TX_ENABLE_ANISO_16X; +		} else +		if (cso->max_anisotropy >= 12.0) { +			ps->en |= NV10TCL_TX_ENABLE_ANISO_12X; +		} else +		if (cso->max_anisotropy >= 10.0) { +			ps->en |= NV10TCL_TX_ENABLE_ANISO_10X; +		} else +		if (cso->max_anisotropy >= 8.0) { +			ps->en |= NV10TCL_TX_ENABLE_ANISO_8X; +		} else +		if (cso->max_anisotropy >= 6.0) { +			ps->en |= NV10TCL_TX_ENABLE_ANISO_6X; +		} else +		if (cso->max_anisotropy >= 4.0) { +			ps->en |= NV10TCL_TX_ENABLE_ANISO_4X; +		} else { +			ps->en |= NV10TCL_TX_ENABLE_ANISO_2X; +		}*/ +	} + +	switch (cso->mag_img_filter) { +	case PIPE_TEX_FILTER_LINEAR: +		filter |= NV10TCL_TX_FILTER_MAGNIFY_LINEAR; +		break; +	case PIPE_TEX_FILTER_NEAREST: +	default: +		filter |= NV10TCL_TX_FILTER_MAGNIFY_NEAREST; +		break; +	} + +	switch (cso->min_img_filter) { +	case PIPE_TEX_FILTER_LINEAR: +		switch (cso->min_mip_filter) { +		case PIPE_TEX_MIPFILTER_NEAREST: +			filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; +			break; +		case PIPE_TEX_MIPFILTER_LINEAR: +			filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; +			break; +		case PIPE_TEX_MIPFILTER_NONE: +		default: +			filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR; +			break; +		} +		break; +	case PIPE_TEX_FILTER_NEAREST: +	default: +		switch (cso->min_mip_filter) { +		case PIPE_TEX_MIPFILTER_NEAREST: +			filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; +		break; +		case PIPE_TEX_MIPFILTER_LINEAR: +			filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; +			break; +		case PIPE_TEX_MIPFILTER_NONE: +		default: +			filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST; +			break; +		} +		break; +	} + +	ps->filt = filter; + +/*	if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { +		switch (cso->compare_func) { +		case PIPE_FUNC_NEVER: +			ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NEVER; +			break; +		case PIPE_FUNC_GREATER: +			ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GREATER; +			break; +		case PIPE_FUNC_EQUAL: +			ps->wrap |= NV10TCL_TX_WRAP_RCOMP_EQUAL; +			break; +		case PIPE_FUNC_GEQUAL: +			ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GEQUAL; +			break; +		case PIPE_FUNC_LESS: +			ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LESS; +			break; +		case PIPE_FUNC_NOTEQUAL: +			ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NOTEQUAL; +			break; +		case PIPE_FUNC_LEQUAL: +			ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LEQUAL; +			break; +		case PIPE_FUNC_ALWAYS: +			ps->wrap |= NV10TCL_TX_WRAP_RCOMP_ALWAYS; +			break; +		default: +			break; +		} +	}*/ + +	ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | +		    (float_to_ubyte(cso->border_color[0]) << 16) | +		    (float_to_ubyte(cso->border_color[1]) <<  8) | +		    (float_to_ubyte(cso->border_color[2]) <<  0)); + +	return (void *)ps; +} + +static void +nv10_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ +	struct nv10_context *nv10 = nv10_context(pipe); +	unsigned unit; + +	for (unit = 0; unit < nr; unit++) { +		nv10->tex_sampler[unit] = sampler[unit]; +		nv10->dirty_samplers |= (1 << unit); +	} +} + +static void +nv10_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	FREE(hwcso); +} + +static void +nv10_set_sampler_texture(struct pipe_context *pipe, unsigned nr, +			 struct pipe_texture **miptree) +{ +	struct nv10_context *nv10 = nv10_context(pipe); +	unsigned unit; + +	for (unit = 0; unit < nr; unit++) { +		nv10->tex_miptree[unit] = (struct nv10_miptree *)miptree[unit]; +		nv10->dirty_samplers |= (1 << unit); +	} +} + +static void * +nv10_rasterizer_state_create(struct pipe_context *pipe, +			     const struct pipe_rasterizer_state *cso) +{ +	struct nv10_rasterizer_state *rs; +	int i; + +	/*XXX: ignored: +	 * 	light_twoside +	 * 	offset_cw/ccw -nohw +	 * 	scissor +	 * 	point_smooth -nohw +	 * 	multisample +	 * 	offset_units / offset_scale +	 */ +	rs = MALLOC(sizeof(struct nv10_rasterizer_state)); + +	rs->templ = cso; +	 +	rs->shade_model = cso->flatshade ? 0x1d00 : 0x1d01; + +	rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff; +	rs->line_smooth_en = cso->line_smooth ? 1 : 0; + +	rs->point_size = *(uint32_t*)&cso->point_size; + +	rs->poly_smooth_en = cso->poly_smooth ? 1 : 0; + +	if (cso->front_winding == PIPE_WINDING_CCW) { +		rs->front_face = NV10TCL_FRONT_FACE_CCW; +		rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw); +		rs->poly_mode_back  = nvgl_polygon_mode(cso->fill_cw); +	} else { +		rs->front_face = NV10TCL_FRONT_FACE_CW; +		rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw); +		rs->poly_mode_back  = nvgl_polygon_mode(cso->fill_ccw); +	} + +	switch (cso->cull_mode) { +	case PIPE_WINDING_CCW: +		rs->cull_face_en = 1; +		if (cso->front_winding == PIPE_WINDING_CCW) +			rs->cull_face    = NV10TCL_CULL_FACE_FRONT; +		else +			rs->cull_face    = NV10TCL_CULL_FACE_BACK; +		break; +	case PIPE_WINDING_CW: +		rs->cull_face_en = 1; +		if (cso->front_winding == PIPE_WINDING_CW) +			rs->cull_face    = NV10TCL_CULL_FACE_FRONT; +		else +			rs->cull_face    = NV10TCL_CULL_FACE_BACK; +		break; +	case PIPE_WINDING_BOTH: +		rs->cull_face_en = 1; +		rs->cull_face    = NV10TCL_CULL_FACE_FRONT_AND_BACK; +		break; +	case PIPE_WINDING_NONE: +	default: +		rs->cull_face_en = 0; +		rs->cull_face    = 0; +		break; +	} + +	if (cso->point_sprite) { +		rs->point_sprite = (1 << 0); +		for (i = 0; i < 8; i++) { +			if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) +				rs->point_sprite |= (1 << (8 + i)); +		} +	} else { +		rs->point_sprite = 0; +	} + +	return (void *)rs; +} + +static void +nv10_rasterizer_state_bind(struct pipe_context *pipe, void *rast) +{ +	struct nv10_context *nv10 = nv10_context(pipe); + +	nv10->rast = (struct nv10_rasterizer_state*)rast; + +	draw_set_rasterizer_state(nv10->draw, (nv10->rast ? nv10->rast->templ : NULL)); + +	nv10->dirty |= NV10_NEW_RAST; +} + +static void +nv10_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	FREE(hwcso); +} + +static void * +nv10_depth_stencil_alpha_state_create(struct pipe_context *pipe, +			const struct pipe_depth_stencil_alpha_state *cso) +{ +	struct nv10_depth_stencil_alpha_state *hw; + +	hw = MALLOC(sizeof(struct nv10_depth_stencil_alpha_state)); + +	hw->depth.func		= nvgl_comparison_op(cso->depth.func); +	hw->depth.write_enable	= cso->depth.writemask ? 1 : 0; +	hw->depth.test_enable	= cso->depth.enabled ? 1 : 0; + +	hw->stencil.enable = cso->stencil[0].enabled ? 1 : 0; +	hw->stencil.wmask = cso->stencil[0].writemask; +	hw->stencil.func = nvgl_comparison_op(cso->stencil[0].func); +	hw->stencil.ref	= cso->stencil[0].ref_value; +	hw->stencil.vmask = cso->stencil[0].valuemask; +	hw->stencil.fail = nvgl_stencil_op(cso->stencil[0].fail_op); +	hw->stencil.zfail = nvgl_stencil_op(cso->stencil[0].zfail_op); +	hw->stencil.zpass = nvgl_stencil_op(cso->stencil[0].zpass_op); + +	hw->alpha.enabled = cso->alpha.enabled ? 1 : 0; +	hw->alpha.func = nvgl_comparison_op(cso->alpha.func); +	hw->alpha.ref  = float_to_ubyte(cso->alpha.ref_value); + +	return (void *)hw; +} + +static void +nv10_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *dsa) +{ +	struct nv10_context *nv10 = nv10_context(pipe); + +	nv10->dsa = (struct nv10_depth_stencil_alpha_state*)dsa; + +	nv10->dirty |= NV10_NEW_DSA; +} + +static void +nv10_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	FREE(hwcso); +} + +static void * +nv10_vp_state_create(struct pipe_context *pipe, +		     const struct pipe_shader_state *templ) +{ +	struct nv10_context *nv10 = nv10_context(pipe); + +	return draw_create_vertex_shader(nv10->draw, templ); +} + +static void +nv10_vp_state_bind(struct pipe_context *pipe, void *shader) +{ +	struct nv10_context *nv10 = nv10_context(pipe); + +	draw_bind_vertex_shader(nv10->draw, (struct draw_vertex_shader *) shader); + +	nv10->dirty |= NV10_NEW_VERTPROG; +} + +static void +nv10_vp_state_delete(struct pipe_context *pipe, void *shader) +{ +	struct nv10_context *nv10 = nv10_context(pipe); + +	draw_delete_vertex_shader(nv10->draw, (struct draw_vertex_shader *) shader); +} + +static void * +nv10_fp_state_create(struct pipe_context *pipe, +		     const struct pipe_shader_state *cso) +{ +	struct nv10_fragment_program *fp; + +	fp = CALLOC(1, sizeof(struct nv10_fragment_program)); +	fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); +	 +	tgsi_scan_shader(cso->tokens, &fp->info); + +	return (void *)fp; +} + +static void +nv10_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv10_context *nv10 = nv10_context(pipe); +	struct nv10_fragment_program *fp = hwcso; + +	nv10->fragprog.current = fp; +	nv10->dirty |= NV10_NEW_FRAGPROG; +} + +static void +nv10_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv10_context *nv10 = nv10_context(pipe); +	struct nv10_fragment_program *fp = hwcso; + +	nv10_fragprog_destroy(nv10, fp); +	FREE((void*)fp->pipe.tokens); +	FREE(fp); +} + +static void +nv10_set_blend_color(struct pipe_context *pipe, +		     const struct pipe_blend_color *bcol) +{ +	struct nv10_context *nv10 = nv10_context(pipe); + +	nv10->blend_color = (struct pipe_blend_color*)bcol; + +	nv10->dirty |= NV10_NEW_BLENDCOL; +} + +static void +nv10_set_clip_state(struct pipe_context *pipe, +		    const struct pipe_clip_state *clip) +{ +	struct nv10_context *nv10 = nv10_context(pipe); + +	draw_set_clip_state(nv10->draw, clip); +} + +static void +nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, +			 const struct pipe_constant_buffer *buf ) +{ +	struct nv10_context *nv10 = nv10_context(pipe); +	struct pipe_winsys *ws = pipe->winsys; + +	assert(shader < PIPE_SHADER_TYPES); +	assert(index == 0); + +	if (buf) { +		void *mapped; +		if (buf->buffer && buf->buffer->size && +                    (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) +		{ +			memcpy(nv10->constbuf[shader], mapped, buf->buffer->size); +			nv10->constbuf_nr[shader] = +				buf->buffer->size / (4 * sizeof(float)); +			ws->buffer_unmap(ws, buf->buffer); +		} +	} +} + +static void +nv10_set_framebuffer_state(struct pipe_context *pipe, +			   const struct pipe_framebuffer_state *fb) +{ +	struct nv10_context *nv10 = nv10_context(pipe); + +	nv10->framebuffer = (struct pipe_framebuffer_state*)fb; + +	nv10->dirty |= NV10_NEW_FRAMEBUFFER; +} + +static void +nv10_set_polygon_stipple(struct pipe_context *pipe, +			 const struct pipe_poly_stipple *stipple) +{ +	NOUVEAU_ERR("line stipple hahaha\n"); +} + +static void +nv10_set_scissor_state(struct pipe_context *pipe, +		       const struct pipe_scissor_state *s) +{ +	struct nv10_context *nv10 = nv10_context(pipe); + +	nv10->scissor = (struct pipe_scissor_state*)s; + +	nv10->dirty |= NV10_NEW_SCISSOR; +} + +static void +nv10_set_viewport_state(struct pipe_context *pipe, +			const struct pipe_viewport_state *vpt) +{ +	struct nv10_context *nv10 = nv10_context(pipe); + +	nv10->viewport = (struct pipe_viewport_state*)vpt; + +	draw_set_viewport_state(nv10->draw, nv10->viewport); + +	nv10->dirty |= NV10_NEW_VIEWPORT; +} + +static void +nv10_set_vertex_buffers(struct pipe_context *pipe, unsigned count, +			const struct pipe_vertex_buffer *vb) +{ +	struct nv10_context *nv10 = nv10_context(pipe); + +	memcpy(nv10->vtxbuf, vb, sizeof(*vb) * count); +	nv10->dirty |= NV10_NEW_VTXARRAYS; + +	draw_set_vertex_buffers(nv10->draw, count, vb); +} + +static void +nv10_set_vertex_elements(struct pipe_context *pipe, unsigned count, +			 const struct pipe_vertex_element *ve) +{ +	struct nv10_context *nv10 = nv10_context(pipe); + +	memcpy(nv10->vtxelt, ve, sizeof(*ve) * count); +	nv10->dirty |= NV10_NEW_VTXARRAYS; + +	draw_set_vertex_elements(nv10->draw, count, ve); +} + +void +nv10_init_state_functions(struct nv10_context *nv10) +{ +	nv10->pipe.create_blend_state = nv10_blend_state_create; +	nv10->pipe.bind_blend_state = nv10_blend_state_bind; +	nv10->pipe.delete_blend_state = nv10_blend_state_delete; + +	nv10->pipe.create_sampler_state = nv10_sampler_state_create; +	nv10->pipe.bind_sampler_states = nv10_sampler_state_bind; +	nv10->pipe.delete_sampler_state = nv10_sampler_state_delete; +	nv10->pipe.set_sampler_textures = nv10_set_sampler_texture; + +	nv10->pipe.create_rasterizer_state = nv10_rasterizer_state_create; +	nv10->pipe.bind_rasterizer_state = nv10_rasterizer_state_bind; +	nv10->pipe.delete_rasterizer_state = nv10_rasterizer_state_delete; + +	nv10->pipe.create_depth_stencil_alpha_state = +		nv10_depth_stencil_alpha_state_create; +	nv10->pipe.bind_depth_stencil_alpha_state = +		nv10_depth_stencil_alpha_state_bind; +	nv10->pipe.delete_depth_stencil_alpha_state = +		nv10_depth_stencil_alpha_state_delete; + +	nv10->pipe.create_vs_state = nv10_vp_state_create; +	nv10->pipe.bind_vs_state = nv10_vp_state_bind; +	nv10->pipe.delete_vs_state = nv10_vp_state_delete; + +	nv10->pipe.create_fs_state = nv10_fp_state_create; +	nv10->pipe.bind_fs_state = nv10_fp_state_bind; +	nv10->pipe.delete_fs_state = nv10_fp_state_delete; + +	nv10->pipe.set_blend_color = nv10_set_blend_color; +	nv10->pipe.set_clip_state = nv10_set_clip_state; +	nv10->pipe.set_constant_buffer = nv10_set_constant_buffer; +	nv10->pipe.set_framebuffer_state = nv10_set_framebuffer_state; +	nv10->pipe.set_polygon_stipple = nv10_set_polygon_stipple; +	nv10->pipe.set_scissor_state = nv10_set_scissor_state; +	nv10->pipe.set_viewport_state = nv10_set_viewport_state; + +	nv10->pipe.set_vertex_buffers = nv10_set_vertex_buffers; +	nv10->pipe.set_vertex_elements = nv10_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h new file mode 100644 index 0000000000..3a3fd0d4f4 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_state.h @@ -0,0 +1,139 @@ +#ifndef __NV10_STATE_H__ +#define __NV10_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv10_blend_state { +	uint32_t b_enable; +	uint32_t b_srcfunc; +	uint32_t b_dstfunc; + +	uint32_t c_mask; + +	uint32_t d_enable; +}; + +struct nv10_sampler_state { +	uint32_t wrap; +	uint32_t en; +	uint32_t filt; +	uint32_t bcol; +}; + +struct nv10_rasterizer_state { +	uint32_t shade_model; + +	uint32_t line_width; +	uint32_t line_smooth_en; + +	uint32_t point_size; + +	uint32_t poly_smooth_en; +	 +	uint32_t poly_mode_front; +	uint32_t poly_mode_back; + +	uint32_t front_face; +	uint32_t cull_face; +	uint32_t cull_face_en; + +	uint32_t point_sprite; + +	const struct pipe_rasterizer_state *templ; +}; + +struct nv10_vertex_program_exec { +	uint32_t data[4]; +	boolean has_branch_offset; +	int const_index; +}; + +struct nv10_vertex_program_data { +	int index; /* immediates == -1 */ +	float value[4]; +}; + +struct nv10_vertex_program { +	const struct pipe_shader_state *pipe; + +	boolean translated; +	struct nv10_vertex_program_exec *insns; +	unsigned nr_insns; +	struct nv10_vertex_program_data *consts; +	unsigned nr_consts; + +	struct nouveau_resource *exec; +	unsigned exec_start; +	struct nouveau_resource *data; +	unsigned data_start; +	unsigned data_start_min; + +	uint32_t ir; +	uint32_t or; +}; + +struct nv10_fragment_program_data { +	unsigned offset; +	unsigned index; +}; + +struct nv10_fragment_program { +	struct pipe_shader_state pipe; +	struct tgsi_shader_info info; + +	boolean translated; +	boolean on_hw; +	unsigned samplers; + +	uint32_t *insn; +	int       insn_len; + +	struct nv10_fragment_program_data *consts; +	unsigned nr_consts; + +	struct pipe_buffer *buffer; + +	uint32_t fp_control; +	uint32_t fp_reg_control; +}; + + +struct nv10_depth_stencil_alpha_state { +	struct { +		uint32_t func; +		uint32_t write_enable; +		uint32_t test_enable; +	} depth; + +	struct { +		uint32_t enable; +		uint32_t wmask; +		uint32_t func; +		uint32_t ref; +		uint32_t vmask; +		uint32_t fail; +		uint32_t zfail; +		uint32_t zpass; +	} stencil; + +	struct { +		uint32_t enabled; +		uint32_t func; +		uint32_t ref; +	} alpha; +}; + +struct nv10_miptree { +	struct pipe_texture base; + +	struct pipe_buffer *buffer; +	uint total_size; + +	struct { +		uint pitch; +		uint *image_offset; +	} level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c new file mode 100644 index 0000000000..5dec618b93 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -0,0 +1,306 @@ +#include "nv10_context.h" +#include "nv10_state.h" + +static void nv10_state_emit_blend(struct nv10_context* nv10) +{ +	struct nv10_blend_state *b = nv10->blend; + +	BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1); +	OUT_RING  (b->d_enable); + +	BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); +	OUT_RING  (b->b_enable); +	OUT_RING  (b->b_srcfunc); +	OUT_RING  (b->b_dstfunc); + +	BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); +	OUT_RING  (b->c_mask); +} + +static void nv10_state_emit_blend_color(struct nv10_context* nv10) +{ +	struct pipe_blend_color *c = nv10->blend_color; + +	BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1); +	OUT_RING  ((float_to_ubyte(c->color[3]) << 24)| +		   (float_to_ubyte(c->color[0]) << 16)| +		   (float_to_ubyte(c->color[1]) << 8) | +		   (float_to_ubyte(c->color[2]) << 0)); +} + +static void nv10_state_emit_rast(struct nv10_context* nv10) +{ +	struct nv10_rasterizer_state *r = nv10->rast; + +	BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2); +	OUT_RING  (r->shade_model); +	OUT_RING  (r->line_width); + + +	BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); +	OUT_RING  (r->point_size); + +	BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); +	OUT_RING  (r->poly_mode_front); +	OUT_RING  (r->poly_mode_back); + + +	BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); +	OUT_RING  (r->cull_face); +	OUT_RING  (r->front_face); + +	BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); +	OUT_RING  (r->line_smooth_en); +	OUT_RING  (r->poly_smooth_en); + +	BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); +	OUT_RING  (r->cull_face_en); +} + +static void nv10_state_emit_dsa(struct nv10_context* nv10) +{ +	struct nv10_depth_stencil_alpha_state *d = nv10->dsa; + +	BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); +	OUT_RING (d->depth.func); + +	BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); +	OUT_RING (d->depth.write_enable); + +	BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); +	OUT_RING (d->depth.test_enable); + +#if 0 +	BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1); +	OUT_RING (d->stencil.enable); +	BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7); +	OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); +#endif + +	BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); +	OUT_RING (d->alpha.enabled); + +	BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 1); +	OUT_RING (d->alpha.func); + +	BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_REF, 1); +	OUT_RING (d->alpha.ref); +} + +static void nv10_state_emit_viewport(struct nv10_context* nv10) +{ +} + +static void nv10_state_emit_scissor(struct nv10_context* nv10) +{ +	// XXX this is so not working +/*	struct pipe_scissor_state *s = nv10->scissor; +	BEGIN_RING(celsius, NV10TCL_SCISSOR_HORIZ, 2); +	OUT_RING  (((s->maxx - s->minx) << 16) | s->minx); +	OUT_RING  (((s->maxy - s->miny) << 16) | s->miny);*/ +} + +static void nv10_state_emit_framebuffer(struct nv10_context* nv10) +{ +	struct pipe_framebuffer_state* fb = nv10->framebuffer; +	struct pipe_surface *rt, *zeta = NULL; +	uint32_t rt_format, w, h; +	int colour_format = 0, zeta_format = 0; +        struct nv10_miptree *nv10mt = 0; + +	w = fb->cbufs[0]->width; +	h = fb->cbufs[0]->height; +	colour_format = fb->cbufs[0]->format; +	rt = fb->cbufs[0]; + +	if (fb->zsbuf) { +		if (colour_format) { +			assert(w == fb->zsbuf->width); +			assert(h == fb->zsbuf->height); +		} else { +			w = fb->zsbuf->width; +			h = fb->zsbuf->height; +		} + +		zeta_format = fb->zsbuf->format; +		zeta = fb->zsbuf; +	} + +	rt_format = NV10TCL_RT_FORMAT_TYPE_LINEAR; + +	switch (colour_format) { +	case PIPE_FORMAT_A8R8G8B8_UNORM: +	case 0: +		rt_format |= NV10TCL_RT_FORMAT_COLOR_A8R8G8B8; +		break; +	case PIPE_FORMAT_R5G6B5_UNORM: +		rt_format |= NV10TCL_RT_FORMAT_COLOR_R5G6B5; +		break; +	default: +		assert(0); +	} + +	if (zeta) { +		BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); +		OUT_RING  (rt->stride | (zeta->stride << 16)); +	} else { +		BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); +		OUT_RING  (rt->stride | (rt->stride << 16)); +	} + +	nv10mt = (struct nv10_miptree *)rt->texture; +	nv10->rt[0] = nv10mt->buffer; + +	if (zeta_format) +	{ +		nv10mt = (struct nv10_miptree *)zeta->texture; +		nv10->zeta = nv10mt->buffer; +	} + +	BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3); +	OUT_RING  ((w << 16) | 0); +	OUT_RING  ((h << 16) | 0); +	OUT_RING  (rt_format); +	BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); +	OUT_RING  (((w - 1) << 16) | 0 | 0x08000800); +	OUT_RING  (((h - 1) << 16) | 0 | 0x08000800); +} + +static void nv10_vertex_layout(struct nv10_context *nv10) +{ +	struct nv10_fragment_program *fp = nv10->fragprog.current; +	uint32_t src = 0; +	int i; +	struct vertex_info vinfo; + +	memset(&vinfo, 0, sizeof(vinfo)); + +	for (i = 0; i < fp->info.num_inputs; i++) { +		switch (fp->info.input_semantic_name[i]) { +			case TGSI_SEMANTIC_POSITION: +				draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); +				break; +			case TGSI_SEMANTIC_COLOR: +				draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); +				break; +			default: +			case TGSI_SEMANTIC_GENERIC: +				draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); +				break; +			case TGSI_SEMANTIC_FOG: +				draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); +				break; +		} +	} +	draw_compute_vertex_size(&vinfo); +} + +void +nv10_emit_hw_state(struct nv10_context *nv10) +{ +	int i; + +	if (nv10->dirty & NV10_NEW_VERTPROG) { +		//nv10_vertprog_bind(nv10, nv10->vertprog.current); +		nv10->dirty &= ~NV10_NEW_VERTPROG; +	} + +	if (nv10->dirty & NV10_NEW_FRAGPROG) { +		nv10_fragprog_bind(nv10, nv10->fragprog.current); +		/*XXX: clear NV10_NEW_FRAGPROG if no new program uploaded */ +		nv10->dirty_samplers |= (1<<10); +		nv10->dirty_samplers = 0; +	} + +	if (nv10->dirty_samplers || (nv10->dirty & NV10_NEW_FRAGPROG)) { +		nv10_fragtex_bind(nv10); +		nv10->dirty &= ~NV10_NEW_FRAGPROG; +	} + +	if (nv10->dirty & NV10_NEW_VTXARRAYS) { +		nv10->dirty &= ~NV10_NEW_VTXARRAYS; +		nv10_vertex_layout(nv10); +		nv10_vtxbuf_bind(nv10); +	} + +	if (nv10->dirty & NV10_NEW_BLEND) { +		nv10->dirty &= ~NV10_NEW_BLEND; +		nv10_state_emit_blend(nv10); +	} + +	if (nv10->dirty & NV10_NEW_BLENDCOL) { +		nv10->dirty &= ~NV10_NEW_BLENDCOL; +		nv10_state_emit_blend_color(nv10); +	} + +	if (nv10->dirty & NV10_NEW_RAST) { +		nv10->dirty &= ~NV10_NEW_RAST; +		nv10_state_emit_rast(nv10); +	} + +	if (nv10->dirty & NV10_NEW_DSA) { +		nv10->dirty &= ~NV10_NEW_DSA; +		nv10_state_emit_dsa(nv10); +	} + + 	if (nv10->dirty & NV10_NEW_VIEWPORT) { +		nv10->dirty &= ~NV10_NEW_VIEWPORT; +		nv10_state_emit_viewport(nv10); +	} + + 	if (nv10->dirty & NV10_NEW_SCISSOR) { +		nv10->dirty &= ~NV10_NEW_SCISSOR; +		nv10_state_emit_scissor(nv10); +	} + + 	if (nv10->dirty & NV10_NEW_FRAMEBUFFER) { +		nv10->dirty &= ~NV10_NEW_FRAMEBUFFER; +		nv10_state_emit_framebuffer(nv10); +	} + +	/* Emit relocs for every referenced buffer. +	 * This is to ensure the bufmgr has an accurate idea of how +	 * the buffer is used.  This isn't very efficient, but we don't +	 * seem to take a significant performance hit.  Will be improved +	 * at some point.  Vertex arrays are emitted by nv10_vbo.c +	 */ + +	/* Render target */ +// XXX figre out who's who for NV10TCL_DMA_* and fill accordingly +//	BEGIN_RING(celsius, NV10TCL_DMA_COLOR0, 1); +//	OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); +	OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + +	if (nv10->zeta) { +// XXX +//		BEGIN_RING(celsius, NV10TCL_DMA_ZETA, 1); +//		OUT_RELOCo(nv10->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +		BEGIN_RING(celsius, NV10TCL_ZETA_OFFSET, 1); +		OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +		/* XXX for when we allocate LMA on nv17 */ +/*		BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); +		OUT_RELOCl(nv10->zeta + lma_offset);*/ +	} + +	/* Vertex buffer */ +	BEGIN_RING(celsius, NV10TCL_DMA_VTXBUF0, 1); +	OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); +	OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + +	/* Texture images */ +	for (i = 0; i < 2; i++) { +		if (!(nv10->fp_samplers & (1 << i))) +			continue; +		BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 1); +		OUT_RELOCl(nv10->tex[i].buffer, 0, NOUVEAU_BO_VRAM | +			   NOUVEAU_BO_GART | NOUVEAU_BO_RD); +		BEGIN_RING(celsius, NV10TCL_TX_FORMAT(i), 1); +		OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format, +			   NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | +			   NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0, +			   NV10TCL_TX_FORMAT_DMA1); +	} +} + diff --git a/src/gallium/drivers/nv10/nv10_surface.c b/src/gallium/drivers/nv10/nv10_surface.c new file mode 100644 index 0000000000..2538151063 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_surface.c @@ -0,0 +1,72 @@ + +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "nv10_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" + +static void +nv10_surface_copy(struct pipe_context *pipe, boolean do_flip, +		  struct pipe_surface *dest, unsigned destx, unsigned desty, +		  struct pipe_surface *src, unsigned srcx, unsigned srcy, +		  unsigned width, unsigned height) +{ +	struct nv10_context *nv10 = nv10_context(pipe); +	struct nv04_surface_2d *eng2d = nv10->screen->eng2d; + +	if (do_flip) { +		desty += height; +		while (height--) { +			eng2d->copy(eng2d, dest, destx, desty--, src, +				    srcx, srcy++, width, 1); +		} +		return; +	} + +	eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); +} + +static void +nv10_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, +		  unsigned destx, unsigned desty, unsigned width, +		  unsigned height, unsigned value) +{ +	struct nv10_context *nv10 = nv10_context(pipe); +	struct nv04_surface_2d *eng2d = nv10->screen->eng2d; + +	eng2d->fill(eng2d, dest, destx, desty, width, height, value); +} + +void +nv10_init_surface_functions(struct nv10_context *nv10) +{ +	nv10->pipe.surface_copy = nv10_surface_copy; +	nv10->pipe.surface_fill = nv10_surface_fill; +} diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c new file mode 100644 index 0000000000..d0e788ac03 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -0,0 +1,77 @@ +#include "draw/draw_context.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv10_context.h" +#include "nv10_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" + +boolean nv10_draw_elements( struct pipe_context *pipe, +                    struct pipe_buffer *indexBuffer, +                    unsigned indexSize, +                    unsigned prim, unsigned start, unsigned count) +{ +	struct nv10_context *nv10 = nv10_context( pipe ); +	struct draw_context *draw = nv10->draw; +	unsigned i; + +	nv10_emit_hw_state(nv10); + +	/* +	 * Map vertex buffers +	 */ +	for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { +		if (nv10->vtxbuf[i].buffer) { +			void *buf +				= pipe->winsys->buffer_map(pipe->winsys, +						nv10->vtxbuf[i].buffer, +						PIPE_BUFFER_USAGE_CPU_READ); +			draw_set_mapped_vertex_buffer(draw, i, buf); +		} +	} +	/* Map index buffer, if present */ +	if (indexBuffer) { +		void *mapped_indexes +			= pipe->winsys->buffer_map(pipe->winsys, indexBuffer, +					PIPE_BUFFER_USAGE_CPU_READ); +		draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); +	} +	else { +		/* no index/element buffer */ +		draw_set_mapped_element_buffer(draw, 0, NULL); +	} + +	draw_set_mapped_constant_buffer(draw, +					nv10->constbuf[PIPE_SHADER_VERTEX], +					nv10->constbuf_nr[PIPE_SHADER_VERTEX]); + +	/* draw! */ +	draw_arrays(nv10->draw, prim, start, count); + +	/* +	 * unmap vertex/index buffers +	 */ +	for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { +		if (nv10->vtxbuf[i].buffer) { +			pipe->winsys->buffer_unmap(pipe->winsys, nv10->vtxbuf[i].buffer); +			draw_set_mapped_vertex_buffer(draw, i, NULL); +		} +	} +	if (indexBuffer) { +		pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); +		draw_set_mapped_element_buffer(draw, 0, NULL); +	} + +	return TRUE; +} + +boolean nv10_draw_arrays( struct pipe_context *pipe, +				 unsigned prim, unsigned start, unsigned count) +{ +	return nv10_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + diff --git a/src/gallium/drivers/nv20/Makefile b/src/gallium/drivers/nv20/Makefile new file mode 100644 index 0000000000..d777fd3d8b --- /dev/null +++ b/src/gallium/drivers/nv20/Makefile @@ -0,0 +1,29 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv20 + +DRIVER_SOURCES = \ +	nv20_clear.c \ +	nv20_context.c \ +	nv20_fragprog.c \ +	nv20_fragtex.c \ +	nv20_miptree.c \ +	nv20_prim_vbuf.c \ +	nv20_screen.c \ +	nv20_state.c \ +	nv20_state_emit.c \ +	nv20_surface.c \ +	nv20_vbo.c +#	nv20_vertprog.c + +C_SOURCES = \ +	$(COMMON_SOURCES) \ +	$(DRIVER_SOURCES) + +ASM_SOURCES =  + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv20/nv20_clear.c b/src/gallium/drivers/nv20/nv20_clear.c new file mode 100644 index 0000000000..29f4afd87c --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_clear.c @@ -0,0 +1,13 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv20_context.h" + +void +nv20_clear(struct pipe_context *pipe, struct pipe_surface *ps, +	   unsigned clearValue) +{ +	pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +	ps->status = PIPE_SURFACE_STATUS_CLEAR; +} diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c new file mode 100644 index 0000000000..1659aec8fa --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_context.c @@ -0,0 +1,419 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv20_context.h" +#include "nv20_screen.h" + +static void +nv20_flush(struct pipe_context *pipe, unsigned flags, +	   struct pipe_fence_handle **fence) +{ +	struct nv20_context *nv20 = nv20_context(pipe); + +	draw_flush(nv20->draw); + +	FIRE_RING(fence); +} + +static void +nv20_destroy(struct pipe_context *pipe) +{ +	struct nv20_context *nv20 = nv20_context(pipe); + +	if (nv20->draw) +		draw_destroy(nv20->draw); + +	FREE(nv20); +} + +static void nv20_init_hwctx(struct nv20_context *nv20) +{ +	struct nv20_screen *screen = nv20->screen; +	struct nouveau_winsys *nvws = screen->nvws; +	int i; +	float projectionmatrix[16]; +	const boolean is_nv25tcl = (nv20->screen->kelvin->grclass == NV25TCL); + +	BEGIN_RING(kelvin, NV20TCL_DMA_NOTIFY, 1); +	OUT_RING  (screen->sync->handle); +	BEGIN_RING(kelvin, NV20TCL_DMA_TEXTURE0, 2); +	OUT_RING  (nvws->channel->vram->handle); +	OUT_RING  (nvws->channel->gart->handle); /* TEXTURE1 */ +	BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 2); +	OUT_RING  (nvws->channel->vram->handle); +	OUT_RING  (nvws->channel->vram->handle); /* ZETA */ + +	BEGIN_RING(kelvin, NV20TCL_DMA_QUERY, 1); +	OUT_RING  (0); /* renouveau: beef0351, unique */ + +	BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2); +	OUT_RING  (0); +	OUT_RING  (0); + +	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1); +	OUT_RING  ((0xfff << 16) | 0x0); +	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1); +	OUT_RING  ((0xfff << 16) | 0x0); + +	for (i = 1; i < NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE; i++) { +		BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1); +		OUT_RING  (0); +		BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1); +		OUT_RING  (0); +	} + +	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1); +	OUT_RING  (0); + +	BEGIN_RING(kelvin, 0x17e0, 3); +	OUT_RINGf (0.0); +	OUT_RINGf (0.0); +	OUT_RINGf (1.0); + +	if (is_nv25tcl) { +		BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1); +		OUT_RING  (NV20TCL_TX_RCOMP_LEQUAL | 0xdb0); +	} else { +		BEGIN_RING(kelvin, 0x1e68, 1); +		OUT_RING  (0x4b800000); /* 16777216.000000 */ +		BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1); +		OUT_RING  (NV20TCL_TX_RCOMP_LEQUAL); +	} + +	BEGIN_RING(kelvin, 0x290, 1); +	OUT_RING  ((0x10 << 16) | 1); +	BEGIN_RING(kelvin, 0x9fc, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, 0x1d80, 1); +	OUT_RING  (1); +	BEGIN_RING(kelvin, 0x9f8, 1); +	OUT_RING  (4); +	BEGIN_RING(kelvin, 0x17ec, 3); +	OUT_RINGf (0.0); +	OUT_RINGf (1.0); +	OUT_RINGf (0.0); + +	if (is_nv25tcl) { +		BEGIN_RING(kelvin, 0x1d88, 1); +		OUT_RING  (3); + +		BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY9, 1); +		OUT_RING  (nvws->channel->vram->handle); +		BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY8, 1); +		OUT_RING  (nvws->channel->vram->handle); +	} +	BEGIN_RING(kelvin, NV20TCL_DMA_FENCE, 1); +	OUT_RING  (0);	/* renouveau: beef1e10 */ + +	BEGIN_RING(kelvin, 0x1e98, 1); +	OUT_RING  (0); +#if 0 +	if (is_nv25tcl) { +		BEGIN_RING(NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2); +		OUT_RING  (NvDmaTT);	/* renouveau: beef0202 */ +		OUT_RING  (NvDmaFB);	/* renouveau: beef0201 */ + +		BEGIN_RING(NvSub3D, NV20TCL_DMA_TEXTURE1, 1); +		OUT_RING  (NvDmaTT);	/* renouveau: beef0202 */ +	} +#endif +	BEGIN_RING(kelvin, NV20TCL_NOTIFY, 1); +	OUT_RING  (0); + +	BEGIN_RING(kelvin, 0x120, 3); +	OUT_RING  (0); +	OUT_RING  (1); +	OUT_RING  (2); + +/* error: ILLEGAL_MTHD, PROTECTION_FAULT +	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); +	OUT_RINGf (0.0); +	OUT_RINGf (512.0); +	OUT_RINGf (0.0); +	OUT_RINGf (0.0); +*/ + +	if (is_nv25tcl) { +		BEGIN_RING(kelvin, 0x022c, 2); +		OUT_RING  (0x280); +		OUT_RING  (0x07d28000); +	} + +/* * illegal method, protection fault +	BEGIN_RING(NvSub3D, 0x1c2c, 1); +	OUT_RING  (0); */ + +	if (is_nv25tcl) { +		BEGIN_RING(kelvin, 0x1da4, 1); +		OUT_RING  (0); +	} + +/* * crashes with illegal method, protection fault +	BEGIN_RING(NvSub3D, 0x1c18, 1); +	OUT_RING  (0x200); */ + +	BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2); +	OUT_RING  ((0 << 16) | 0); +	OUT_RING  ((0 << 16) | 0); + +	/* *** Set state *** */ + +	BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2); +	OUT_RING  (NV20TCL_ALPHA_FUNC_FUNC_ALWAYS); +	OUT_RING  (0);			/* NV20TCL_ALPHA_FUNC_REF */ + +	for (i = 0; i < NV20TCL_TX_ENABLE__SIZE; ++i) { +		BEGIN_RING(kelvin, NV20TCL_TX_ENABLE(i), 1); +		OUT_RING  (0); +	} +	BEGIN_RING(kelvin, NV20TCL_TX_SHADER_OP, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_RC_IN_ALPHA(0), 4); +	OUT_RING  (0x30d410d0); +	OUT_RING  (0); +	OUT_RING  (0); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_RC_OUT_RGB(0), 4); +	OUT_RING  (0x00000c00); +	OUT_RING  (0); +	OUT_RING  (0); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_RC_ENABLE, 1); +	OUT_RING  (0x00011101); +	BEGIN_RING(kelvin, NV20TCL_RC_FINAL0, 2); +	OUT_RING  (0x130e0300); +	OUT_RING  (0x0c091c80); +	BEGIN_RING(kelvin, NV20TCL_RC_OUT_ALPHA(0), 4); +	OUT_RING  (0x00000c00); +	OUT_RING  (0); +	OUT_RING  (0); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_RC_IN_RGB(0), 4); +	OUT_RING  (0x20c400c0); +	OUT_RING  (0); +	OUT_RING  (0); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_RC_COLOR0, 2); +	OUT_RING  (0); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4); +	OUT_RING  (0x035125a0); +	OUT_RING  (0); +	OUT_RING  (0x40002000); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1); +	OUT_RING  (0xffff0000); + +	BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 4); +	OUT_RING  (NV20TCL_BLEND_FUNC_SRC_ONE); +	OUT_RING  (NV20TCL_BLEND_FUNC_DST_ZERO); +	OUT_RING  (0);			/* NV20TCL_BLEND_COLOR */ +	OUT_RING  (NV20TCL_BLEND_EQUATION_FUNC_ADD); +	BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7); +	OUT_RING  (0xff); +	OUT_RING  (NV20TCL_STENCIL_FUNC_FUNC_ALWAYS); +	OUT_RING  (0);			/* NV20TCL_STENCIL_FUNC_REF */ +	OUT_RING  (0xff);		/* NV20TCL_STENCIL_FUNC_MASK */ +	OUT_RING  (NV20TCL_STENCIL_OP_FAIL_KEEP); +	OUT_RING  (NV20TCL_STENCIL_OP_ZFAIL_KEEP); +	OUT_RING  (NV20TCL_STENCIL_OP_ZPASS_KEEP); + +	BEGIN_RING(kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2); +	OUT_RING  (0); +	OUT_RING  (NV20TCL_COLOR_LOGIC_OP_OP_COPY); +	BEGIN_RING(kelvin, 0x17cc, 1); +	OUT_RING  (0); +	if (is_nv25tcl) { +		BEGIN_RING(kelvin, 0x1d84, 1); +		OUT_RING  (1); +	} +	BEGIN_RING(kelvin, NV20TCL_LIGHTING_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_LIGHT_CONTROL, 1); +	OUT_RING  (0x00020000); +	BEGIN_RING(kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_ENABLED_LIGHTS, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_NORMALIZE_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0), +					NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE); +	for (i = 0; i < NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE; ++i) { +		OUT_RING(0xffffffff); +	} + +	BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3); +	OUT_RING  (0); +	OUT_RING  (0);		/* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */ +	OUT_RING  (0);		/* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */ +	BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1); +	OUT_RING  (NV20TCL_DEPTH_FUNC_LESS); +	BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2); +	OUT_RINGf (0.0); +	OUT_RINGf (0.0);	/* NV20TCL.POLYGON_OFFSET_UNITS */ +	BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1); +	OUT_RING  (1); +	if (!is_nv25tcl) { +		BEGIN_RING(kelvin, 0x1d84, 1); +		OUT_RING  (3); +	} +	BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1); +	if (!is_nv25tcl) { +		OUT_RING  (8); +	} else { +		OUT_RINGf (1.0); +	} +	if (!is_nv25tcl) { +		BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2); +		OUT_RING  (0); +		OUT_RING  (0);		/* NV20TCL.POINT_SMOOTH_ENABLE */ +	} else { +		BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1); +		OUT_RING  (0); +		BEGIN_RING(kelvin, 0x0a1c, 1); +		OUT_RING  (0x800); +	} +	BEGIN_RING(kelvin, NV20TCL_LINE_WIDTH, 1); +	OUT_RING  (8); +	BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); +	OUT_RING  (NV20TCL_POLYGON_MODE_FRONT_FILL); +	OUT_RING  (NV20TCL_POLYGON_MODE_BACK_FILL); +	BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2); +	OUT_RING  (NV20TCL_CULL_FACE_BACK); +	OUT_RING  (NV20TCL_FRONT_FACE_CCW); +	BEGIN_RING(kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 1); +	OUT_RING  (NV20TCL_SHADE_MODEL_SMOOTH); +	BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1); +	OUT_RING  (0); +	BEGIN_RING(kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE); +	for (i=0; i < 4 * NV20TCL_TX_GEN_S__SIZE; ++i) { +		OUT_RING(0); +	} +	BEGIN_RING(kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3); +	OUT_RINGf (1.5); +	OUT_RINGf (-0.090168);		/* NV20TCL.FOG_EQUATION_LINEAR */ +	OUT_RINGf (0.0);		/* NV20TCL.FOG_EQUATION_QUADRATIC */ +	BEGIN_RING(kelvin, NV20TCL_FOG_MODE, 2); +	OUT_RING  (NV20TCL_FOG_MODE_EXP_2); +	OUT_RING  (NV20TCL_FOG_COORD_DIST_COORD_FOG); +	BEGIN_RING(kelvin, NV20TCL_FOG_ENABLE, 2); +	OUT_RING  (0); +	OUT_RING  (0);			/* NV20TCL.FOG_COLOR */ +	BEGIN_RING(kelvin, NV20TCL_ENGINE, 1); +	OUT_RING  (NV20TCL_ENGINE_FIXED); + +	for (i = 0; i < NV20TCL_TX_MATRIX_ENABLE__SIZE; ++i) { +		BEGIN_RING(kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1); +		OUT_RING  (0); +	} + +	BEGIN_RING(kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15); +	OUT_RINGf(1.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); +	OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); OUT_RINGf(1.0); +	OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); +	for (i = 4; i < 16; ++i) { +		OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(0.0);	OUT_RINGf(1.0); +	} + +	BEGIN_RING(kelvin, NV20TCL_EDGEFLAG_ENABLE, 1); +	OUT_RING  (1); +	BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1); +	OUT_RING (0x00010101); +	BEGIN_RING(kelvin, NV20TCL_CLEAR_VALUE, 1); +	OUT_RING (0); + +	memset(projectionmatrix, 0, sizeof(projectionmatrix)); +	projectionmatrix[0*4+0] = 1.0; +	projectionmatrix[1*4+1] = 1.0; +	projectionmatrix[2*4+2] = 16777215.0; +	projectionmatrix[3*4+3] = 1.0; +	BEGIN_RING(kelvin, NV20TCL_PROJECTION_MATRIX(0), 16); +	for (i = 0; i < 16; i++) { +		OUT_RINGf  (projectionmatrix[i]); +	} + +	BEGIN_RING(kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2); +	OUT_RINGf (0.0); +	OUT_RINGf (16777216.0); /* [0, 1] scaled approx to [0, 2^24] */ + +	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE0_X, 4); +	OUT_RINGf (0.0); /* x-offset, w/2 + 1.031250 */ +	OUT_RINGf (0.0); /* y-offset, h/2 + 0.030762 */ +	OUT_RINGf (0.0); +	OUT_RINGf (16777215.0); + +	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE1_X, 4); +	OUT_RINGf (0.0); /* no effect?, w/2 */ +	OUT_RINGf (0.0); /* no effect?, h/2 */ +	OUT_RINGf (16777215.0 * 0.5); +	OUT_RINGf (65535.0); + +	FIRE_RING (NULL); +} + +static void +nv20_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + +struct pipe_context * +nv20_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ +	struct nv20_screen *screen = nv20_screen(pscreen); +	struct pipe_winsys *ws = pscreen->winsys; +	struct nv20_context *nv20; +	struct nouveau_winsys *nvws = screen->nvws; + +	nv20 = CALLOC(1, sizeof(struct nv20_context)); +	if (!nv20) +		return NULL; +	nv20->screen = screen; +	nv20->pctx_id = pctx_id; + +	nv20->nvws = nvws; + +	nv20->pipe.winsys = ws; +	nv20->pipe.screen = pscreen; +	nv20->pipe.destroy = nv20_destroy; +	nv20->pipe.set_edgeflags = nv20_set_edgeflags; +	nv20->pipe.draw_arrays = nv20_draw_arrays; +	nv20->pipe.draw_elements = nv20_draw_elements; +	nv20->pipe.clear = nv20_clear; +	nv20->pipe.flush = nv20_flush; + +	nv20_init_surface_functions(nv20); +	nv20_init_state_functions(nv20); + +	nv20->draw = draw_create(); +	assert(nv20->draw); +	draw_set_rasterize_stage(nv20->draw, nv20_draw_vbuf_stage(nv20)); + +	nv20_init_hwctx(nv20); + +	return &nv20->pipe; +} + diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h new file mode 100644 index 0000000000..8ad926db20 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_context.h @@ -0,0 +1,153 @@ +#ifndef __NV20_CONTEXT_H__ +#define __NV20_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx)                                              \ +	struct nv20_screen *ctx = nv20->screen +#include "nouveau/nouveau_push.h" + +#include "nv20_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ +	fprintf(stderr, "%s:%d -  "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ +	fprintf(stderr, "nouveau: "fmt, ##args); + +#define NV20_NEW_VERTPROG	(1 << 0) +#define NV20_NEW_FRAGPROG	(1 << 1) +#define NV20_NEW_VTXARRAYS	(1 << 2) +#define NV20_NEW_BLEND		(1 << 3) +#define NV20_NEW_BLENDCOL	(1 << 4) +#define NV20_NEW_RAST 		(1 << 5) +#define NV20_NEW_DSA  		(1 << 6) +#define NV20_NEW_VIEWPORT	(1 << 7) +#define NV20_NEW_SCISSOR	(1 << 8) +#define NV20_NEW_FRAMEBUFFER	(1 << 9) + +#include "nv20_screen.h" + +struct nv20_context { +	struct pipe_context pipe; + +	struct nouveau_winsys *nvws; +	struct nv20_screen *screen; +	unsigned pctx_id; + +	struct draw_context *draw; + +	uint32_t dirty; + +	struct nv20_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; +	struct nv20_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; +	unsigned dirty_samplers; +	unsigned fp_samplers; +	unsigned vp_samplers; + +	uint32_t rt_enable; +	struct pipe_buffer *rt[4]; +	struct pipe_buffer *zeta; +	uint32_t lma_offset; + +	struct nv20_blend_state *blend; +	struct pipe_blend_color *blend_color; +	struct nv20_rasterizer_state *rast; +	struct nv20_depth_stencil_alpha_state *dsa; +	struct pipe_viewport_state *viewport; +	struct pipe_scissor_state *scissor; +	struct pipe_framebuffer_state *framebuffer; + +	//struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; +	float *constbuf[PIPE_SHADER_TYPES][32][4]; +	unsigned constbuf_nr[PIPE_SHADER_TYPES]; + +	struct vertex_info vertex_info; + +	struct { +		struct pipe_buffer *buffer; +		uint32_t format; +	} tex[2]; + +	unsigned vb_enable; +	struct { +		struct pipe_buffer *buffer; +		unsigned delta; +	} vb[16]; + +/*	struct { +	 +		struct nouveau_resource *exec_heap; +		struct nouveau_resource *data_heap; + +		struct nv20_vertex_program *active; + +		struct nv20_vertex_program *current; +	} vertprog; +*/ +	struct { +		struct nv20_fragment_program *active; + +		struct nv20_fragment_program *current; +		struct pipe_buffer *constant_buf; +	} fragprog; + +	struct pipe_vertex_buffer  vtxbuf[PIPE_MAX_ATTRIBS]; +	struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; +}; + +static INLINE struct nv20_context * +nv20_context(struct pipe_context *pipe) +{ +	return (struct nv20_context *)pipe; +} + +extern void nv20_init_state_functions(struct nv20_context *nv20); +extern void nv20_init_surface_functions(struct nv20_context *nv20); + +extern void nv20_screen_init_miptree_functions(struct pipe_screen *pscreen); + +/* nv20_clear.c */ +extern void nv20_clear(struct pipe_context *pipe, struct pipe_surface *ps, +		       unsigned clearValue); + +/* nv20_draw.c */ +extern struct draw_stage *nv20_draw_render_stage(struct nv20_context *nv20); + +/* nv20_fragprog.c */ +extern void nv20_fragprog_bind(struct nv20_context *, +			       struct nv20_fragment_program *); +extern void nv20_fragprog_destroy(struct nv20_context *, +				  struct nv20_fragment_program *); + +/* nv20_fragtex.c */ +extern void nv20_fragtex_bind(struct nv20_context *); + +/* nv20_prim_vbuf.c */ +struct draw_stage *nv20_draw_vbuf_stage( struct nv20_context *nv20 ); +extern void nv20_vtxbuf_bind(struct nv20_context* nv20); + +/* nv20_state.c and friends */ +extern void nv20_emit_hw_state(struct nv20_context *nv20); +extern void nv20_state_tex_update(struct nv20_context *nv20); + +/* nv20_vbo.c */ +extern boolean nv20_draw_arrays(struct pipe_context *, unsigned mode, +				unsigned start, unsigned count); +extern boolean nv20_draw_elements( struct pipe_context *pipe, +                    struct pipe_buffer *indexBuffer, +                    unsigned indexSize, +                    unsigned prim, unsigned start, unsigned count); + + +#endif diff --git a/src/gallium/drivers/nv20/nv20_fragprog.c b/src/gallium/drivers/nv20/nv20_fragprog.c new file mode 100644 index 0000000000..4f496369dd --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_fragprog.c @@ -0,0 +1,21 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv20_context.h" + +void +nv20_fragprog_bind(struct nv20_context *nv20, struct nv20_fragment_program *fp) +{ +} + +void +nv20_fragprog_destroy(struct nv20_context *nv20, +		      struct nv20_fragment_program *fp) +{ +} + diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c new file mode 100644 index 0000000000..495a7be912 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_fragtex.c @@ -0,0 +1,124 @@ +#include "nv20_context.h" +#include "nouveau/nouveau_util.h" + +#define _(m,tf)                                                                \ +{                                                                              \ +  TRUE,                                                                        \ +  PIPE_FORMAT_##m,                                                             \ +  NV20TCL_TX_FORMAT_FORMAT_##tf,                                               \ +} + +struct nv20_texture_format { +	boolean defined; +	uint	pipe; +	int     format; +}; + +static struct nv20_texture_format +nv20_texture_formats[] = { +	_(A8R8G8B8_UNORM, A8R8G8B8), +	_(A1R5G5B5_UNORM, A1R5G5B5), +	_(A4R4G4B4_UNORM, A4R4G4B4), +	_(L8_UNORM      , L8      ), +	_(A8_UNORM      , A8      ), +	_(A8L8_UNORM    , A8L8    ), +/*	_(RGB_DXT1      , DXT1,   ), */ +/*	_(RGBA_DXT1     , DXT1,   ), */ +/*	_(RGBA_DXT3     , DXT3,   ), */ +/*	_(RGBA_DXT5     , DXT5,   ), */ +	{}, +}; + +static struct nv20_texture_format * +nv20_fragtex_format(uint pipe_format) +{ +	struct nv20_texture_format *tf = nv20_texture_formats; + +	while (tf->defined) { +		if (tf->pipe == pipe_format) +			return tf; +		tf++; +	} + +	return NULL; +} + + +static void +nv20_fragtex_build(struct nv20_context *nv20, int unit) +{ +#if 0 +	struct nv20_sampler_state *ps = nv20->tex_sampler[unit]; +	struct nv20_miptree *nv20mt = nv20->tex_miptree[unit]; +	struct pipe_texture *pt = &nv20mt->base; +	struct nv20_texture_format *tf; +	uint32_t txf, txs, txp; + +	tf = nv20_fragtex_format(pt->format); +	if (!tf || !tf->defined) { +		NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format); +		return; +	} + +	txf  = tf->format << 8; +	txf |= (pt->last_level + 1) << 16; +	txf |= log2i(pt->width[0]) << 20; +	txf |= log2i(pt->height[0]) << 24; +	txf |= log2i(pt->depth[0]) << 28; +	txf |= 8; + +	switch (pt->target) { +	case PIPE_TEXTURE_CUBE: +		txf |= NV10TCL_TX_FORMAT_CUBE_MAP; +		/* fall-through */ +	case PIPE_TEXTURE_2D: +		txf |= (2<<4); +		break; +	case PIPE_TEXTURE_1D: +		txf |= (1<<4); +		break; +	default: +		NOUVEAU_ERR("Unknown target %d\n", pt->target); +		return; +	} + +	BEGIN_RING(kelvin, NV10TCL_TX_OFFSET(unit), 8); +	OUT_RELOCl(nv20mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); +	OUT_RELOCd(nv20mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); +	OUT_RING  (ps->wrap); +	OUT_RING  (0x40000000); /* enable */ +	OUT_RING  (txs); +	OUT_RING  (ps->filt | 0x2000 /* magic */); +	OUT_RING  ((pt->width[0] << 16) | pt->height[0]); +	OUT_RING  (ps->bcol); +#endif +} + +void +nv20_fragtex_bind(struct nv20_context *nv20) +{ +#if 0 +	struct nv20_fragment_program *fp = nv20->fragprog.active; +	unsigned samplers, unit; + +	samplers = nv20->fp_samplers & ~fp->samplers; +	while (samplers) { +		unit = ffs(samplers) - 1; +		samplers &= ~(1 << unit); + +		BEGIN_RING(kelvin, NV10TCL_TX_ENABLE(unit), 1); +		OUT_RING  (0); +	} + +	samplers = nv20->dirty_samplers & fp->samplers; +	while (samplers) { +		unit = ffs(samplers) - 1; +		samplers &= ~(1 << unit); + +		nv20_fragtex_build(nv20, unit); +	} + +	nv20->fp_samplers = fp->samplers; +#endif +} + diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c new file mode 100644 index 0000000000..ef7e9c5428 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -0,0 +1,206 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv20_context.h" +#include "nv20_screen.h" + +static void +nv20_miptree_layout(struct nv20_miptree *nv20mt) +{ +	struct pipe_texture *pt = &nv20mt->base; +	boolean swizzled = FALSE; +	uint width = pt->width[0], height = pt->height[0]; +	uint offset = 0; +	int nr_faces, l, f; + +	if (pt->target == PIPE_TEXTURE_CUBE) { +		nr_faces = 6; +	} else { +		nr_faces = 1; +	} +	 +	for (l = 0; l <= pt->last_level; l++) { +		pt->width[l] = width; +		pt->height[l] = height; +		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); +		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + +		if (swizzled) +			nv20mt->level[l].pitch = pt->nblocksx[l] * pt->block.size; +		else +			nv20mt->level[l].pitch = pt->nblocksx[0] * pt->block.size; +		nv20mt->level[l].pitch = (nv20mt->level[l].pitch + 63) & ~63; + +		nv20mt->level[l].image_offset = +			CALLOC(nr_faces, sizeof(unsigned)); + +		width  = MAX2(1, width  >> 1); +		height = MAX2(1, height >> 1); + +	} + +	for (f = 0; f < nr_faces; f++) { +		for (l = 0; l <= pt->last_level; l++) { +			nv20mt->level[l].image_offset[f] = offset; +			offset += nv20mt->level[l].pitch * pt->height[l]; +		} +	} + +	nv20mt->total_size = offset; +} + +static struct pipe_texture * +nv20_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, +		     const unsigned *stride, struct pipe_buffer *pb) +{ +	struct nv20_miptree *mt; + +	/* Only supports 2D, non-mipmapped textures for the moment */ +	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || +	    pt->depth[0] != 1) +		return NULL; + +	mt = CALLOC_STRUCT(nv20_miptree); +	if (!mt) +		return NULL; + +	mt->base = *pt; +	mt->base.refcount = 1; +	mt->base.screen = pscreen; +	mt->level[0].pitch = stride[0]; +	mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + +	pipe_buffer_reference(pscreen, &mt->buffer, pb); +	return &mt->base; +} + +static struct pipe_texture * +nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) +{ +	struct pipe_winsys *ws = screen->winsys; +	struct nv20_miptree *mt; +	unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | +	                     NOUVEAU_BUFFER_USAGE_TEXTURE; + +	mt = MALLOC(sizeof(struct nv20_miptree)); +	if (!mt) +		return NULL; +	mt->base = *pt; +	mt->base.refcount = 1; +	mt->base.screen = screen; + +	/* Swizzled textures must be POT */ +	if (pt->width[0] & (pt->width[0] - 1) || +	    pt->height[0] & (pt->height[0] - 1)) +		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +	else +	if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | +	                     PIPE_TEXTURE_USAGE_DISPLAY_TARGET)) +		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +	else +	if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) +		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +	else { +		switch (pt->format) { +		/* TODO: Figure out which formats can be swizzled */ +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_X8R8G8B8_UNORM: +		case PIPE_FORMAT_R16_SNORM: +			break; +		default: +			mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +		} +	} + +	if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) +		buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + +	nv20_miptree_layout(mt); + +	mt->buffer = ws->buffer_create(ws, 256, buf_usage, mt->total_size); +	if (!mt->buffer) { +		FREE(mt); +		return NULL; +	} +	 +	return &mt->base; +} + +static void +nv20_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) +{ +	struct pipe_texture *mt = *pt; + +	*pt = NULL; +	if (--mt->refcount <= 0) { +		struct nv20_miptree *nv20mt = (struct nv20_miptree *)mt; +		int l; + +		pipe_buffer_reference(screen, &nv20mt->buffer, NULL); +		for (l = 0; l <= mt->last_level; l++) { +			if (nv20mt->level[l].image_offset) +				FREE(nv20mt->level[l].image_offset); +		} +		FREE(nv20mt); +	} +} + +static struct pipe_surface * +nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, +			 unsigned face, unsigned level, unsigned zslice, +			 unsigned flags) +{ +	struct nv20_miptree *nv20mt = (struct nv20_miptree *)pt; +	struct pipe_surface *ps; + +	ps = CALLOC_STRUCT(pipe_surface); +	if (!ps) +		return NULL; +	pipe_texture_reference(&ps->texture, pt); +	ps->format = pt->format; +	ps->width = pt->width[level]; +	ps->height = pt->height[level]; +	ps->block = pt->block; +	ps->nblocksx = pt->nblocksx[level]; +	ps->nblocksy = pt->nblocksy[level]; +	ps->stride = nv20mt->level[level].pitch; +	ps->usage = flags; +	ps->status = PIPE_SURFACE_STATUS_DEFINED; +	ps->refcount = 1; + +	if (pt->target == PIPE_TEXTURE_CUBE) { +		ps->offset = nv20mt->level[level].image_offset[face]; +	} else +	if (pt->target == PIPE_TEXTURE_3D) { +		ps->offset = nv20mt->level[level].image_offset[zslice]; +	} else { +		ps->offset = nv20mt->level[level].image_offset[0]; +	} + +	return ps; +} + +static void +nv20_miptree_surface_release(struct pipe_screen *pscreen, +			     struct pipe_surface **psurface) +{ +	struct pipe_surface *ps = *psurface; + +	*psurface = NULL; +	if (--ps->refcount > 0) +		return; + +	pipe_texture_reference(&ps->texture, NULL); +	FREE(ps); +} + +void nv20_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ +	pscreen->texture_create = nv20_miptree_create; +	pscreen->texture_blanket = nv20_miptree_blanket; +	pscreen->texture_release = nv20_miptree_release; +	pscreen->get_tex_surface = nv20_miptree_surface_get; +	pscreen->tex_surface_release = nv20_miptree_surface_release; +} + diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c new file mode 100644 index 0000000000..4dd7052814 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_prim_vbuf.c @@ -0,0 +1,402 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * \file + * Build post-transformation, post-clipping vertex buffers and element + * lists by hooking into the end of the primitive pipeline and + * manipulating the vertex_id field in the vertex headers. + * + * XXX: work in progress  + *  + * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_debug.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv20_context.h" +#include "nv20_state.h" + +#include "draw/draw_vbuf.h" + +/** + * Primitive renderer for nv20. + */ +struct nv20_vbuf_render { +	struct vbuf_render base; + +	struct nv20_context *nv20;    + +	/** Vertex buffer in VRAM */ +	struct pipe_buffer *pbuffer; + +	/** Vertex buffer in normal memory */ +	void *mbuffer; + +	/** Vertex size in bytes */ +	/*unsigned vertex_size;*/ + +	/** Hardware primitive */ +	unsigned hwprim; +}; + +/** + * Basically a cast wrapper. + */ +static INLINE struct nv20_vbuf_render * +nv20_vbuf_render(struct vbuf_render *render) +{ +	assert(render); +	return (struct nv20_vbuf_render *)render; +} + +void nv20_vtxbuf_bind( struct nv20_context* nv20 ) +{ +#if 0 +	int i; +	for(i = 0; i < NV20TCL_VTXBUF_ADDRESS__SIZE; i++) { +		BEGIN_RING(kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1); +		OUT_RING(0/*nv20->vtxbuf*/); +		BEGIN_RING(kelvin, NV20TCL_VTXFMT(i) ,1); +		OUT_RING(0/*XXX*/); +	} +#endif +} + +static const struct vertex_info * +nv20_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ +	struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); +	struct nv20_context *nv20 = nv20_render->nv20; + +	nv20_emit_hw_state(nv20); + +	return &nv20->vertex_info; +} + +static void * +nv20__allocate_mbuffer(struct nv20_vbuf_render *nv20_render, size_t size) +{ +	nv20_render->mbuffer = MALLOC(size); +	return nv20_render->mbuffer; +} + +static void * +nv20__allocate_pbuffer(struct nv20_vbuf_render *nv20_render, size_t size) +{ +	struct pipe_winsys *winsys = nv20_render->nv20->pipe.winsys; +	nv20_render->pbuffer = winsys->buffer_create(winsys, 64, +					PIPE_BUFFER_USAGE_VERTEX, size); +	return winsys->buffer_map(winsys, +			nv20_render->pbuffer, +			PIPE_BUFFER_USAGE_CPU_WRITE); +} + +static void * +nv20_vbuf_render_allocate_vertices( struct vbuf_render *render, +		ushort vertex_size, +		ushort nr_vertices ) +{ +	struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); +	size_t size = (size_t)vertex_size * (size_t)nr_vertices; +	void *buf; + +	assert(!nv20_render->pbuffer); +	assert(!nv20_render->mbuffer); + +	/* +	 * For small amount of vertices, don't bother with pipe vertex +	 * buffer, the data will be passed directly via the fifo. +	 */ +	/* XXX: Pipe vertex buffers don't work. */ +	if (0 && size > 16 * 1024) +		buf = nv20__allocate_pbuffer(nv20_render, size); +	else +		buf = nv20__allocate_mbuffer(nv20_render, size); + +	if (buf) +		nv20_render->nv20->dirty |= NV20_NEW_VTXARRAYS; + +	return buf; +} + +static boolean +nv20_vbuf_render_set_primitive( struct vbuf_render *render,  +		unsigned prim ) +{ +	struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); +	unsigned hwp = nvgl_primitive(prim); +	if (hwp == 0) +		return FALSE; + +	nv20_render->hwprim = hwp; +	return TRUE; +} + +static uint32_t +nv20__vtxhwformat(unsigned stride, unsigned fields, unsigned type) +{ +	return (stride << NV20TCL_VTXFMT_STRIDE_SHIFT) | +		(fields << NV20TCL_VTXFMT_SIZE_SHIFT) | +		(type << NV20TCL_VTXFMT_TYPE_SHIFT); +} + +static unsigned +nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr) +{ +	uint32_t hwfmt = 0; +	unsigned fields; + +	switch (type) { +	case EMIT_OMIT: +		hwfmt = nv20__vtxhwformat(0, 0, 2); +		fields = 0; +		break; +	case EMIT_1F: +		hwfmt = nv20__vtxhwformat(4, 1, 2); +		fields = 1; +		break; +	case EMIT_2F: +		hwfmt = nv20__vtxhwformat(8, 2, 2); +		fields = 2; +		break; +	case EMIT_3F: +		hwfmt = nv20__vtxhwformat(12, 3, 2); +		fields = 3; +		break; +	case EMIT_4F: +		hwfmt = nv20__vtxhwformat(16, 4, 2); +		fields = 4; +		break; +	default: +		NOUVEAU_ERR("unhandled attrib_emit %d\n", type); +		return 0; +	} + +	BEGIN_RING(kelvin, NV20TCL_VTXFMT(hwattr), 1); +	OUT_RING(hwfmt); +	return fields; +} + +static unsigned +nv20__emit_vertex_array_format(struct nv20_context *nv20) +{ +	struct vertex_info *vinfo = &nv20->vertex_info; +	int hwattr = NV20TCL_VTXFMT__SIZE; +	int attr = 0; +	unsigned nr_fields = 0; + +	while (hwattr-- > 0) { +		if (vinfo->hwfmt[0] & (1 << hwattr)) { +			nr_fields += nv20__emit_format(nv20, +					vinfo->attrib[attr].emit, hwattr); +			attr++; +		} else +			nv20__emit_format(nv20, EMIT_OMIT, hwattr); +	} + +	return nr_fields; +} + +static void +nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render, +		const ushort *indices, +		uint nr_indices) +{ +	struct nv20_context *nv20 = nv20_render->nv20; +	struct vertex_info *vinfo = &nv20->vertex_info; +	unsigned nr_fields; +	int max_push; +	ubyte *data = nv20_render->mbuffer; +	int vsz = 4 * vinfo->size; + +	nr_fields = nv20__emit_vertex_array_format(nv20); + +	BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1); +	OUT_RING(nv20_render->hwprim); + +	max_push = 1200 / nr_fields; +	while (nr_indices) { +		int i; +		int push = MIN2(nr_indices, max_push); + +		BEGIN_RING_NI(kelvin, NV20TCL_VERTEX_DATA, push * nr_fields); +		for (i = 0; i < push; i++) { +			/* XXX: fixme to handle other than floats? */ +			int f = nr_fields; +			float *attrv = (float*)&data[indices[i] * vsz]; +			while (f-- > 0) +				OUT_RINGf(*attrv++); +		} + +		nr_indices -= push; +		indices += push; +	} + +	BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1); +	OUT_RING(NV20TCL_VERTEX_BEGIN_END_STOP); +} + +static void +nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render, +		const ushort *indices, +		uint nr_indices) +{ +	struct nv20_context *nv20 = nv20_render->nv20; +	int push, i; + +	NOUVEAU_ERR("nv20__draw_pbuffer: this path is broken.\n"); + +	BEGIN_RING(kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); +	OUT_RELOCl(nv20_render->pbuffer, 0, +			NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + +	BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); +	OUT_RING(nv20_render->hwprim); + +	if (nr_indices & 1) { +		BEGIN_RING(kelvin, NV10TCL_VB_ELEMENT_U32, 1); +		OUT_RING  (indices[0]); +		indices++; nr_indices--; +	} + +	while (nr_indices) { +		// XXX too big/small ? check the size +		push = MIN2(nr_indices, 1200 * 2); + +		BEGIN_RING_NI(kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1); +		for (i = 0; i < push; i+=2) +			OUT_RING((indices[i+1] << 16) | indices[i]); + +		nr_indices -= push; +		indices  += push; +	} + +	BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); +	OUT_RING  (0); +} + +static void +nv20_vbuf_render_draw( struct vbuf_render *render, +		const ushort *indices, +		uint nr_indices) +{ +	struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); + +	nv20_emit_hw_state(nv20_render->nv20); + +	if (nv20_render->pbuffer) +		nv20__draw_pbuffer(nv20_render, indices, nr_indices); +	else if (nv20_render->mbuffer) +		nv20__draw_mbuffer(nv20_render, indices, nr_indices); +	else +		assert(0); +} + + +static void +nv20_vbuf_render_release_vertices( struct vbuf_render *render, +		void *vertices,  +		unsigned vertex_size, +		unsigned vertices_used ) +{ +	struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); +	struct nv20_context *nv20 = nv20_render->nv20; +	struct pipe_winsys *winsys = nv20->pipe.winsys; +	struct pipe_screen *pscreen = &nv20->screen->pipe; + +	if (nv20_render->pbuffer) { +		winsys->buffer_unmap(winsys, nv20_render->pbuffer); +		pipe_buffer_reference(pscreen, &nv20_render->pbuffer, NULL); +	} else if (nv20_render->mbuffer) { +		FREE(nv20_render->mbuffer); +		nv20_render->mbuffer = NULL; +	} else +		assert(0); +} + + +static void +nv20_vbuf_render_destroy( struct vbuf_render *render ) +{ +	struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); + +	assert(!nv20_render->pbuffer); +	assert(!nv20_render->mbuffer); + +	FREE(nv20_render); +} + + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +nv20_vbuf_render_create( struct nv20_context *nv20 ) +{ +	struct nv20_vbuf_render *nv20_render = CALLOC_STRUCT(nv20_vbuf_render); + +	nv20_render->nv20 = nv20; + +	nv20_render->base.max_vertex_buffer_bytes = 16*1024; +	nv20_render->base.max_indices = 1024; +	nv20_render->base.get_vertex_info = nv20_vbuf_render_get_vertex_info; +	nv20_render->base.allocate_vertices = +					nv20_vbuf_render_allocate_vertices; +	nv20_render->base.set_primitive = nv20_vbuf_render_set_primitive; +	nv20_render->base.draw = nv20_vbuf_render_draw; +	nv20_render->base.release_vertices = nv20_vbuf_render_release_vertices; +	nv20_render->base.destroy = nv20_vbuf_render_destroy; + +	return &nv20_render->base; +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *nv20_draw_vbuf_stage( struct nv20_context *nv20 ) +{ +	struct vbuf_render *render; +	struct draw_stage *stage; + +	render = nv20_vbuf_render_create(nv20); +	if(!render) +		return NULL; + +	stage = draw_vbuf_stage( nv20->draw, render ); +	if(!stage) { +		render->destroy(render); +		return NULL; +	} + +	return stage; +} diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c new file mode 100644 index 0000000000..5f2b7b4f71 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -0,0 +1,222 @@ +#include "pipe/p_screen.h" +#include "util/u_simple_screen.h" + +#include "nv20_context.h" +#include "nv20_screen.h" + +static const char * +nv20_screen_get_name(struct pipe_screen *screen) +{ +	struct nv20_screen *nv20screen = nv20_screen(screen); +	struct nouveau_device *dev = nv20screen->nvws->channel->device; +	static char buffer[128]; + +	snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); +	return buffer; +} + +static const char * +nv20_screen_get_vendor(struct pipe_screen *screen) +{ +	return "nouveau"; +} + +static int +nv20_screen_get_param(struct pipe_screen *screen, int param) +{ +	switch (param) { +	case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: +		return 2; +	case PIPE_CAP_NPOT_TEXTURES: +		return 0; +	case PIPE_CAP_TWO_SIDED_STENCIL: +		return 0; +	case PIPE_CAP_GLSL: +		return 0; +	case PIPE_CAP_S3TC: +		return 0; +	case PIPE_CAP_ANISOTROPIC_FILTER: +		return 1; +	case PIPE_CAP_POINT_SPRITE: +		return 0; +	case PIPE_CAP_MAX_RENDER_TARGETS: +		return 1; +	case PIPE_CAP_OCCLUSION_QUERY: +		return 0; +	case PIPE_CAP_TEXTURE_SHADOW_MAP: +		return 0; +	case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: +		return 12; +	case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: +		return 0; +	case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: +		return 12; +	case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: +		return 0; +	case NOUVEAU_CAP_HW_VTXBUF: +	case NOUVEAU_CAP_HW_IDXBUF: +		return 0; +	default: +		NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); +		return 0; +	} +} + +static float +nv20_screen_get_paramf(struct pipe_screen *screen, int param) +{ +	switch (param) { +	case PIPE_CAP_MAX_LINE_WIDTH: +	case PIPE_CAP_MAX_LINE_WIDTH_AA: +		return 10.0; +	case PIPE_CAP_MAX_POINT_WIDTH: +	case PIPE_CAP_MAX_POINT_WIDTH_AA: +		return 64.0; +	case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: +		return 2.0; +	case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: +		return 4.0; +	default: +		NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); +		return 0.0; +	} +} + +static boolean +nv20_screen_is_format_supported(struct pipe_screen *screen, +				enum pipe_format format, +				enum pipe_texture_target target, +				unsigned tex_usage, unsigned geom_flags) +{ +	if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { +		switch (format) { +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_R5G6B5_UNORM:  +		case PIPE_FORMAT_Z24S8_UNORM: +		case PIPE_FORMAT_Z16_UNORM: +			return TRUE; +		default: +			break; +		} +	} else { +		switch (format) { +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_A1R5G5B5_UNORM: +		case PIPE_FORMAT_A4R4G4B4_UNORM: +		case PIPE_FORMAT_R5G6B5_UNORM:  +		case PIPE_FORMAT_L8_UNORM: +		case PIPE_FORMAT_A8_UNORM: +		case PIPE_FORMAT_I8_UNORM: +			return TRUE; +		default: +			break; +		} +	} + +	return FALSE; +} + +static void * +nv20_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, +		 unsigned flags ) +{ +	struct pipe_winsys *ws = screen->winsys; +	void *map; +	struct nv20_miptree *nv20mt = (struct nv20_miptree *)surface->texture; + +	map = ws->buffer_map(ws, nv20mt->buffer, flags); +	if (!map) +		return NULL; + +	return map + surface->offset; +} + +static void +nv20_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ +	struct pipe_winsys *ws = screen->winsys; +	struct nv20_miptree *nv20mt = (struct nv20_miptree *)surface->texture; + +	ws->buffer_unmap(ws, nv20mt->buffer); +} + +static void +nv20_screen_destroy(struct pipe_screen *pscreen) +{ +	struct nv20_screen *screen = nv20_screen(pscreen); +	struct nouveau_winsys *nvws = screen->nvws; + +	nvws->notifier_free(&screen->sync); +	nvws->grobj_free(&screen->kelvin); + +	FREE(pscreen); +} + +static struct pipe_buffer * +nv20_surface_buffer(struct pipe_surface *surf) +{ +	struct nv20_miptree *mt = (struct nv20_miptree *)surf->texture; + +	return mt->buffer; +} + +struct pipe_screen * +nv20_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ +	struct nv20_screen *screen = CALLOC_STRUCT(nv20_screen); +	unsigned kelvin_class = 0; +	unsigned chipset = nvws->channel->device->chipset; +	int ret; + +	if (!screen) +		return NULL; +	screen->nvws = nvws; + +	/* 2D engine setup */ +	screen->eng2d = nv04_surface_2d_init(nvws); +	screen->eng2d->buf = nv20_surface_buffer; + +	/* 3D object */ +	if (chipset >= 0x25) +		kelvin_class = NV25TCL; +	else if (chipset >= 0x20) +		kelvin_class = NV20TCL; + +	if (!kelvin_class || chipset >= 0x30) { +		NOUVEAU_ERR("Unknown nv2x chipset: nv%02x\n", chipset); +		return NULL; +	} + +	ret = nvws->grobj_alloc(nvws, kelvin_class, &screen->kelvin); +	if (ret) { +		NOUVEAU_ERR("Error creating 3D object: %d\n", ret); +		return FALSE; +	} + +	/* Notifier for sync purposes */ +	ret = nvws->notifier_alloc(nvws, 1, &screen->sync); +	if (ret) { +		NOUVEAU_ERR("Error creating notifier object: %d\n", ret); +		nv20_screen_destroy(&screen->pipe); +		return NULL; +	} + +	screen->pipe.winsys = ws; +	screen->pipe.destroy = nv20_screen_destroy; + +	screen->pipe.get_name = nv20_screen_get_name; +	screen->pipe.get_vendor = nv20_screen_get_vendor; +	screen->pipe.get_param = nv20_screen_get_param; +	screen->pipe.get_paramf = nv20_screen_get_paramf; + +	screen->pipe.is_format_supported = nv20_screen_is_format_supported; + +	screen->pipe.surface_map = nv20_surface_map; +	screen->pipe.surface_unmap = nv20_surface_unmap; + +	nv20_screen_init_miptree_functions(&screen->pipe); +	u_simple_screen_init(&screen->pipe); + +	return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv20/nv20_screen.h b/src/gallium/drivers/nv20/nv20_screen.h new file mode 100644 index 0000000000..bf2f2c0d9f --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_screen.h @@ -0,0 +1,24 @@ +#ifndef __NV20_SCREEN_H__ +#define __NV20_SCREEN_H__ + +#include "pipe/p_screen.h" +#include "nv04/nv04_surface_2d.h" + +struct nv20_screen { +	struct pipe_screen pipe; + +	struct nouveau_winsys *nvws; + +	/* HW graphics objects */ +	struct nv04_surface_2d *eng2d; +	struct nouveau_grobj *kelvin; +	struct nouveau_notifier *sync; +}; + +static INLINE struct nv20_screen * +nv20_screen(struct pipe_screen *screen) +{ +	return (struct nv20_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c new file mode 100644 index 0000000000..ecec4f49a0 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_state.c @@ -0,0 +1,582 @@ +#include "draw/draw_context.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv20_context.h" +#include "nv20_state.h" + +static void * +nv20_blend_state_create(struct pipe_context *pipe, +			const struct pipe_blend_state *cso) +{ +	struct nv20_blend_state *cb; + +	cb = MALLOC(sizeof(struct nv20_blend_state)); + +	cb->b_enable = cso->blend_enable ? 1 : 0; +	cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | +			 (nvgl_blend_func(cso->rgb_src_factor))); +	cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) | +			 (nvgl_blend_func(cso->rgb_dst_factor))); + +	cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) | +		      ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) | +		      ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) | +		      ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0)); + +	cb->d_enable = cso->dither ? 1 : 0; + +	return (void *)cb; +} + +static void +nv20_blend_state_bind(struct pipe_context *pipe, void *blend) +{ +	struct nv20_context *nv20 = nv20_context(pipe); + +	nv20->blend = (struct nv20_blend_state*)blend; + +	nv20->dirty |= NV20_NEW_BLEND; +} + +static void +nv20_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	FREE(hwcso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { +	unsigned ret; + +	switch (wrap) { +	case PIPE_TEX_WRAP_REPEAT: +		ret = NV20TCL_TX_WRAP_S_REPEAT; +		break; +	case PIPE_TEX_WRAP_MIRROR_REPEAT: +		ret = NV20TCL_TX_WRAP_S_MIRRORED_REPEAT; +		break; +	case PIPE_TEX_WRAP_CLAMP_TO_EDGE: +		ret = NV20TCL_TX_WRAP_S_CLAMP_TO_EDGE; +		break; +	case PIPE_TEX_WRAP_CLAMP_TO_BORDER: +		ret = NV20TCL_TX_WRAP_S_CLAMP_TO_BORDER; +		break; +	case PIPE_TEX_WRAP_CLAMP: +		ret = NV20TCL_TX_WRAP_S_CLAMP; +		break; +	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: +	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: +	case PIPE_TEX_WRAP_MIRROR_CLAMP: +	default: +		NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); +		ret = NV20TCL_TX_WRAP_S_REPEAT; +		break; +	} + +	return (ret >> NV20TCL_TX_WRAP_S_SHIFT); +} + +static void * +nv20_sampler_state_create(struct pipe_context *pipe, +			  const struct pipe_sampler_state *cso) +{ +	struct nv20_sampler_state *ps; +	uint32_t filter = 0; + +	ps = MALLOC(sizeof(struct nv20_sampler_state)); + +	ps->wrap = ((wrap_mode(cso->wrap_s) << NV20TCL_TX_WRAP_S_SHIFT) | +		    (wrap_mode(cso->wrap_t) << NV20TCL_TX_WRAP_T_SHIFT)); + +	ps->en = 0; +	if (cso->max_anisotropy > 1.0) { +		/* no idea, binary driver sets it, works without it.. meh.. */ +		ps->wrap |= (1 << 5); + +/*		if (cso->max_anisotropy >= 8.0) { +			ps->en |= NV20TCL_TX_ENABLE_ANISO_8X; +		} else +		if (cso->max_anisotropy >= 4.0) { +			ps->en |= NV20TCL_TX_ENABLE_ANISO_4X; +		} else { +			ps->en |= NV20TCL_TX_ENABLE_ANISO_2X; +		}*/ +	} + +	switch (cso->mag_img_filter) { +	case PIPE_TEX_FILTER_LINEAR: +		filter |= NV20TCL_TX_FILTER_MAGNIFY_LINEAR; +		break; +	case PIPE_TEX_FILTER_NEAREST: +	default: +		filter |= NV20TCL_TX_FILTER_MAGNIFY_NEAREST; +		break; +	} + +	switch (cso->min_img_filter) { +	case PIPE_TEX_FILTER_LINEAR: +		switch (cso->min_mip_filter) { +		case PIPE_TEX_MIPFILTER_NEAREST: +			filter |= +				NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; +			break; +		case PIPE_TEX_MIPFILTER_LINEAR: +			filter |= NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; +			break; +		case PIPE_TEX_MIPFILTER_NONE: +		default: +			filter |= NV20TCL_TX_FILTER_MINIFY_LINEAR; +			break; +		} +		break; +	case PIPE_TEX_FILTER_NEAREST: +	default: +		switch (cso->min_mip_filter) { +		case PIPE_TEX_MIPFILTER_NEAREST: +			filter |= +				NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; +		break; +		case PIPE_TEX_MIPFILTER_LINEAR: +			filter |= +				NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; +			break; +		case PIPE_TEX_MIPFILTER_NONE: +		default: +			filter |= NV20TCL_TX_FILTER_MINIFY_NEAREST; +			break; +		} +		break; +	} + +	ps->filt = filter; + +/*	if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { +		switch (cso->compare_func) { +		case PIPE_FUNC_NEVER: +			ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NEVER; +			break; +		case PIPE_FUNC_GREATER: +			ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GREATER; +			break; +		case PIPE_FUNC_EQUAL: +			ps->wrap |= NV10TCL_TX_WRAP_RCOMP_EQUAL; +			break; +		case PIPE_FUNC_GEQUAL: +			ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GEQUAL; +			break; +		case PIPE_FUNC_LESS: +			ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LESS; +			break; +		case PIPE_FUNC_NOTEQUAL: +			ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NOTEQUAL; +			break; +		case PIPE_FUNC_LEQUAL: +			ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LEQUAL; +			break; +		case PIPE_FUNC_ALWAYS: +			ps->wrap |= NV10TCL_TX_WRAP_RCOMP_ALWAYS; +			break; +		default: +			break; +		} +	}*/ + +	ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | +		    (float_to_ubyte(cso->border_color[0]) << 16) | +		    (float_to_ubyte(cso->border_color[1]) <<  8) | +		    (float_to_ubyte(cso->border_color[2]) <<  0)); + +	return (void *)ps; +} + +static void +nv20_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ +	struct nv20_context *nv20 = nv20_context(pipe); +	unsigned unit; + +	for (unit = 0; unit < nr; unit++) { +		nv20->tex_sampler[unit] = sampler[unit]; +		nv20->dirty_samplers |= (1 << unit); +	} +} + +static void +nv20_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	FREE(hwcso); +} + +static void +nv20_set_sampler_texture(struct pipe_context *pipe, unsigned nr, +			 struct pipe_texture **miptree) +{ +	struct nv20_context *nv20 = nv20_context(pipe); +	unsigned unit; + +	for (unit = 0; unit < nr; unit++) { +		nv20->tex_miptree[unit] = (struct nv20_miptree *)miptree[unit]; +		nv20->dirty_samplers |= (1 << unit); +	} +} + +static void * +nv20_rasterizer_state_create(struct pipe_context *pipe, +			     const struct pipe_rasterizer_state *cso) +{ +	struct nv20_rasterizer_state *rs; +	int i; + +	/*XXX: ignored: +	 * 	light_twoside +	 * 	offset_cw/ccw -nohw +	 * 	scissor +	 * 	point_smooth -nohw +	 * 	multisample +	 * 	offset_units / offset_scale +	 */ +	rs = MALLOC(sizeof(struct nv20_rasterizer_state)); + +	rs->templ = cso; +	 +	rs->shade_model = cso->flatshade ? NV20TCL_SHADE_MODEL_FLAT : +						NV20TCL_SHADE_MODEL_SMOOTH; + +	rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff; +	rs->line_smooth_en = cso->line_smooth ? 1 : 0; + +	/* XXX: nv20 and nv25 different! */ +	rs->point_size = *(uint32_t*)&cso->point_size; + +	rs->poly_smooth_en = cso->poly_smooth ? 1 : 0; + +	if (cso->front_winding == PIPE_WINDING_CCW) { +		rs->front_face = NV20TCL_FRONT_FACE_CCW; +		rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw); +		rs->poly_mode_back  = nvgl_polygon_mode(cso->fill_cw); +	} else { +		rs->front_face = NV20TCL_FRONT_FACE_CW; +		rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw); +		rs->poly_mode_back  = nvgl_polygon_mode(cso->fill_ccw); +	} + +	switch (cso->cull_mode) { +	case PIPE_WINDING_CCW: +		rs->cull_face_en = 1; +		if (cso->front_winding == PIPE_WINDING_CCW) +			rs->cull_face    = NV20TCL_CULL_FACE_FRONT; +		else +			rs->cull_face    = NV20TCL_CULL_FACE_BACK; +		break; +	case PIPE_WINDING_CW: +		rs->cull_face_en = 1; +		if (cso->front_winding == PIPE_WINDING_CW) +			rs->cull_face    = NV20TCL_CULL_FACE_FRONT; +		else +			rs->cull_face    = NV20TCL_CULL_FACE_BACK; +		break; +	case PIPE_WINDING_BOTH: +		rs->cull_face_en = 1; +		rs->cull_face    = NV20TCL_CULL_FACE_FRONT_AND_BACK; +		break; +	case PIPE_WINDING_NONE: +	default: +		rs->cull_face_en = 0; +		rs->cull_face    = 0; +		break; +	} + +	if (cso->point_sprite) { +		rs->point_sprite = (1 << 0); +		for (i = 0; i < 8; i++) { +			if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) +				rs->point_sprite |= (1 << (8 + i)); +		} +	} else { +		rs->point_sprite = 0; +	} + +	return (void *)rs; +} + +static void +nv20_rasterizer_state_bind(struct pipe_context *pipe, void *rast) +{ +	struct nv20_context *nv20 = nv20_context(pipe); + +	nv20->rast = (struct nv20_rasterizer_state*)rast; + +	draw_set_rasterizer_state(nv20->draw, (nv20->rast ? nv20->rast->templ : NULL)); + +	nv20->dirty |= NV20_NEW_RAST; +} + +static void +nv20_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	FREE(hwcso); +} + +static void * +nv20_depth_stencil_alpha_state_create(struct pipe_context *pipe, +			const struct pipe_depth_stencil_alpha_state *cso) +{ +	struct nv20_depth_stencil_alpha_state *hw; + +	hw = MALLOC(sizeof(struct nv20_depth_stencil_alpha_state)); + +	hw->depth.func		= nvgl_comparison_op(cso->depth.func); +	hw->depth.write_enable	= cso->depth.writemask ? 1 : 0; +	hw->depth.test_enable	= cso->depth.enabled ? 1 : 0; + +	hw->stencil.enable = cso->stencil[0].enabled ? 1 : 0; +	hw->stencil.wmask = cso->stencil[0].writemask; +	hw->stencil.func = nvgl_comparison_op(cso->stencil[0].func); +	hw->stencil.ref	= cso->stencil[0].ref_value; +	hw->stencil.vmask = cso->stencil[0].valuemask; +	hw->stencil.fail = nvgl_stencil_op(cso->stencil[0].fail_op); +	hw->stencil.zfail = nvgl_stencil_op(cso->stencil[0].zfail_op); +	hw->stencil.zpass = nvgl_stencil_op(cso->stencil[0].zpass_op); + +	hw->alpha.enabled = cso->alpha.enabled ? 1 : 0; +	hw->alpha.func = nvgl_comparison_op(cso->alpha.func); +	hw->alpha.ref  = float_to_ubyte(cso->alpha.ref_value); + +	return (void *)hw; +} + +static void +nv20_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *dsa) +{ +	struct nv20_context *nv20 = nv20_context(pipe); + +	nv20->dsa = (struct nv20_depth_stencil_alpha_state*)dsa; + +	nv20->dirty |= NV20_NEW_DSA; +} + +static void +nv20_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	FREE(hwcso); +} + +static void * +nv20_vp_state_create(struct pipe_context *pipe, +		     const struct pipe_shader_state *templ) +{ +	struct nv20_context *nv20 = nv20_context(pipe); + +	return draw_create_vertex_shader(nv20->draw, templ); +} + +static void +nv20_vp_state_bind(struct pipe_context *pipe, void *shader) +{ +	struct nv20_context *nv20 = nv20_context(pipe); + +	draw_bind_vertex_shader(nv20->draw, (struct draw_vertex_shader *) shader); + +	nv20->dirty |= NV20_NEW_VERTPROG; +} + +static void +nv20_vp_state_delete(struct pipe_context *pipe, void *shader) +{ +	struct nv20_context *nv20 = nv20_context(pipe); + +	draw_delete_vertex_shader(nv20->draw, (struct draw_vertex_shader *) shader); +} + +static void * +nv20_fp_state_create(struct pipe_context *pipe, +		     const struct pipe_shader_state *cso) +{ +	struct nv20_fragment_program *fp; + +	fp = CALLOC(1, sizeof(struct nv20_fragment_program)); +	fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); +	 +	tgsi_scan_shader(cso->tokens, &fp->info); + +	return (void *)fp; +} + +static void +nv20_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv20_context *nv20 = nv20_context(pipe); +	struct nv20_fragment_program *fp = hwcso; + +	nv20->fragprog.current = fp; +	nv20->dirty |= NV20_NEW_FRAGPROG; +} + +static void +nv20_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv20_context *nv20 = nv20_context(pipe); +	struct nv20_fragment_program *fp = hwcso; + +	nv20_fragprog_destroy(nv20, fp); +	FREE((void*)fp->pipe.tokens); +	FREE(fp); +} + +static void +nv20_set_blend_color(struct pipe_context *pipe, +		     const struct pipe_blend_color *bcol) +{ +	struct nv20_context *nv20 = nv20_context(pipe); + +	nv20->blend_color = (struct pipe_blend_color*)bcol; + +	nv20->dirty |= NV20_NEW_BLENDCOL; +} + +static void +nv20_set_clip_state(struct pipe_context *pipe, +		    const struct pipe_clip_state *clip) +{ +	struct nv20_context *nv20 = nv20_context(pipe); + +	draw_set_clip_state(nv20->draw, clip); +} + +static void +nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, +			 const struct pipe_constant_buffer *buf ) +{ +	struct nv20_context *nv20 = nv20_context(pipe); +	struct pipe_winsys *ws = pipe->winsys; + +	assert(shader < PIPE_SHADER_TYPES); +	assert(index == 0); + +	if (buf) { +		void *mapped; +		if (buf->buffer && buf->buffer->size && +                    (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) +		{ +			memcpy(nv20->constbuf[shader], mapped, buf->buffer->size); +			nv20->constbuf_nr[shader] = +				buf->buffer->size / (4 * sizeof(float)); +			ws->buffer_unmap(ws, buf->buffer); +		} +	} +} + +static void +nv20_set_framebuffer_state(struct pipe_context *pipe, +			   const struct pipe_framebuffer_state *fb) +{ +	struct nv20_context *nv20 = nv20_context(pipe); + +	nv20->framebuffer = (struct pipe_framebuffer_state*)fb; + +	nv20->dirty |= NV20_NEW_FRAMEBUFFER; +} + +static void +nv20_set_polygon_stipple(struct pipe_context *pipe, +			 const struct pipe_poly_stipple *stipple) +{ +	NOUVEAU_ERR("line stipple hahaha\n"); +} + +static void +nv20_set_scissor_state(struct pipe_context *pipe, +		       const struct pipe_scissor_state *s) +{ +	struct nv20_context *nv20 = nv20_context(pipe); + +	nv20->scissor = (struct pipe_scissor_state*)s; + +	nv20->dirty |= NV20_NEW_SCISSOR; +} + +static void +nv20_set_viewport_state(struct pipe_context *pipe, +			const struct pipe_viewport_state *vpt) +{ +	struct nv20_context *nv20 = nv20_context(pipe); + +	nv20->viewport = (struct pipe_viewport_state*)vpt; + +	draw_set_viewport_state(nv20->draw, nv20->viewport); + +	nv20->dirty |= NV20_NEW_VIEWPORT; +} + +static void +nv20_set_vertex_buffers(struct pipe_context *pipe, unsigned count, +			const struct pipe_vertex_buffer *vb) +{ +	struct nv20_context *nv20 = nv20_context(pipe); + +	memcpy(nv20->vtxbuf, vb, sizeof(*vb) * count); +	nv20->dirty |= NV20_NEW_VTXARRAYS; + +	draw_set_vertex_buffers(nv20->draw, count, vb); +} + +static void +nv20_set_vertex_elements(struct pipe_context *pipe, unsigned count, +			 const struct pipe_vertex_element *ve) +{ +	struct nv20_context *nv20 = nv20_context(pipe); + +	memcpy(nv20->vtxelt, ve, sizeof(*ve) * count); +	nv20->dirty |= NV20_NEW_VTXARRAYS; + +	draw_set_vertex_elements(nv20->draw, count, ve); +} + +void +nv20_init_state_functions(struct nv20_context *nv20) +{ +	nv20->pipe.create_blend_state = nv20_blend_state_create; +	nv20->pipe.bind_blend_state = nv20_blend_state_bind; +	nv20->pipe.delete_blend_state = nv20_blend_state_delete; + +	nv20->pipe.create_sampler_state = nv20_sampler_state_create; +	nv20->pipe.bind_sampler_states = nv20_sampler_state_bind; +	nv20->pipe.delete_sampler_state = nv20_sampler_state_delete; +	nv20->pipe.set_sampler_textures = nv20_set_sampler_texture; + +	nv20->pipe.create_rasterizer_state = nv20_rasterizer_state_create; +	nv20->pipe.bind_rasterizer_state = nv20_rasterizer_state_bind; +	nv20->pipe.delete_rasterizer_state = nv20_rasterizer_state_delete; + +	nv20->pipe.create_depth_stencil_alpha_state = +		nv20_depth_stencil_alpha_state_create; +	nv20->pipe.bind_depth_stencil_alpha_state = +		nv20_depth_stencil_alpha_state_bind; +	nv20->pipe.delete_depth_stencil_alpha_state = +		nv20_depth_stencil_alpha_state_delete; + +	nv20->pipe.create_vs_state = nv20_vp_state_create; +	nv20->pipe.bind_vs_state = nv20_vp_state_bind; +	nv20->pipe.delete_vs_state = nv20_vp_state_delete; + +	nv20->pipe.create_fs_state = nv20_fp_state_create; +	nv20->pipe.bind_fs_state = nv20_fp_state_bind; +	nv20->pipe.delete_fs_state = nv20_fp_state_delete; + +	nv20->pipe.set_blend_color = nv20_set_blend_color; +	nv20->pipe.set_clip_state = nv20_set_clip_state; +	nv20->pipe.set_constant_buffer = nv20_set_constant_buffer; +	nv20->pipe.set_framebuffer_state = nv20_set_framebuffer_state; +	nv20->pipe.set_polygon_stipple = nv20_set_polygon_stipple; +	nv20->pipe.set_scissor_state = nv20_set_scissor_state; +	nv20->pipe.set_viewport_state = nv20_set_viewport_state; + +	nv20->pipe.set_vertex_buffers = nv20_set_vertex_buffers; +	nv20->pipe.set_vertex_elements = nv20_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv20/nv20_state.h b/src/gallium/drivers/nv20/nv20_state.h new file mode 100644 index 0000000000..34f402fdcb --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_state.h @@ -0,0 +1,139 @@ +#ifndef __NV20_STATE_H__ +#define __NV20_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv20_blend_state { +	uint32_t b_enable; +	uint32_t b_srcfunc; +	uint32_t b_dstfunc; + +	uint32_t c_mask; + +	uint32_t d_enable; +}; + +struct nv20_sampler_state { +	uint32_t wrap; +	uint32_t en; +	uint32_t filt; +	uint32_t bcol; +}; + +struct nv20_rasterizer_state { +	uint32_t shade_model; + +	uint32_t line_width; +	uint32_t line_smooth_en; + +	uint32_t point_size; + +	uint32_t poly_smooth_en; +	 +	uint32_t poly_mode_front; +	uint32_t poly_mode_back; + +	uint32_t front_face; +	uint32_t cull_face; +	uint32_t cull_face_en; + +	uint32_t point_sprite; + +	const struct pipe_rasterizer_state *templ; +}; + +struct nv20_vertex_program_exec { +	uint32_t data[4]; +	boolean has_branch_offset; +	int const_index; +}; + +struct nv20_vertex_program_data { +	int index; /* immediates == -1 */ +	float value[4]; +}; + +struct nv20_vertex_program { +	const struct pipe_shader_state *pipe; + +	boolean translated; +	struct nv20_vertex_program_exec *insns; +	unsigned nr_insns; +	struct nv20_vertex_program_data *consts; +	unsigned nr_consts; + +	struct nouveau_resource *exec; +	unsigned exec_start; +	struct nouveau_resource *data; +	unsigned data_start; +	unsigned data_start_min; + +	uint32_t ir; +	uint32_t or; +}; + +struct nv20_fragment_program_data { +	unsigned offset; +	unsigned index; +}; + +struct nv20_fragment_program { +	struct pipe_shader_state pipe; +	struct tgsi_shader_info info; + +	boolean translated; +	boolean on_hw; +	unsigned samplers; + +	uint32_t *insn; +	int       insn_len; + +	struct nv20_fragment_program_data *consts; +	unsigned nr_consts; + +	struct pipe_buffer *buffer; + +	uint32_t fp_control; +	uint32_t fp_reg_control; +}; + + +struct nv20_depth_stencil_alpha_state { +	struct { +		uint32_t func; +		uint32_t write_enable; +		uint32_t test_enable; +	} depth; + +	struct { +		uint32_t enable; +		uint32_t wmask; +		uint32_t func; +		uint32_t ref; +		uint32_t vmask; +		uint32_t fail; +		uint32_t zfail; +		uint32_t zpass; +	} stencil; + +	struct { +		uint32_t enabled; +		uint32_t func; +		uint32_t ref; +	} alpha; +}; + +struct nv20_miptree { +	struct pipe_texture base; + +	struct pipe_buffer *buffer; +	uint total_size; + +	struct { +		uint pitch; +		uint *image_offset; +	} level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c new file mode 100644 index 0000000000..0f4df9ca31 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_state_emit.c @@ -0,0 +1,396 @@ +#include "nv20_context.h" +#include "nv20_state.h" +#include "draw/draw_context.h" + +static void nv20_state_emit_blend(struct nv20_context* nv20) +{ +	struct nv20_blend_state *b = nv20->blend; + +	BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1); +	OUT_RING  (b->d_enable); + +	BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); +	OUT_RING  (b->b_enable); + +	BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 2); +	OUT_RING  (b->b_srcfunc); +	OUT_RING  (b->b_dstfunc); + +	BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1); +	OUT_RING  (b->c_mask); +} + +static void nv20_state_emit_blend_color(struct nv20_context* nv20) +{ +	struct pipe_blend_color *c = nv20->blend_color; + +	BEGIN_RING(kelvin, NV20TCL_BLEND_COLOR, 1); +	OUT_RING  ((float_to_ubyte(c->color[3]) << 24)| +		   (float_to_ubyte(c->color[0]) << 16)| +		   (float_to_ubyte(c->color[1]) << 8) | +		   (float_to_ubyte(c->color[2]) << 0)); +} + +static void nv20_state_emit_rast(struct nv20_context* nv20) +{ +	struct nv20_rasterizer_state *r = nv20->rast; + +	BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 2); +	OUT_RING  (r->shade_model); +	OUT_RING  (r->line_width); + + +	BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1); +	OUT_RING  (r->point_size); + +	BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); +	OUT_RING  (r->poly_mode_front); +	OUT_RING  (r->poly_mode_back); + + +	BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2); +	OUT_RING  (r->cull_face); +	OUT_RING  (r->front_face); + +	BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2); +	OUT_RING  (r->line_smooth_en); +	OUT_RING  (r->poly_smooth_en); + +	BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1); +	OUT_RING  (r->cull_face_en); +} + +static void nv20_state_emit_dsa(struct nv20_context* nv20) +{ +	struct nv20_depth_stencil_alpha_state *d = nv20->dsa; + +	BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1); +	OUT_RING (d->depth.func); + +	BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); +	OUT_RING (d->depth.write_enable); + +	BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); +	OUT_RING (d->depth.test_enable); + +	BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1); +	OUT_RING (1); + +#if 0 +	BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1); +	OUT_RING (d->stencil.enable); +	BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7); +	OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); +#endif + +	BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); +	OUT_RING (d->alpha.enabled); + +	BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1); +	OUT_RING (d->alpha.func); + +	BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_REF, 1); +	OUT_RING (d->alpha.ref); +} + +static void nv20_state_emit_viewport(struct nv20_context* nv20) +{ +} + +static void nv20_state_emit_scissor(struct nv20_context* nv20) +{ +	/* NV20TCL_SCISSOR_* is probably a software method */ +/*	struct pipe_scissor_state *s = nv20->scissor; +	BEGIN_RING(kelvin, NV20TCL_SCISSOR_HORIZ, 2); +	OUT_RING  (((s->maxx - s->minx) << 16) | s->minx); +	OUT_RING  (((s->maxy - s->miny) << 16) | s->miny);*/ +} + +static void nv20_state_emit_framebuffer(struct nv20_context* nv20) +{ +	struct pipe_framebuffer_state* fb = nv20->framebuffer; +	struct pipe_surface *rt, *zeta = NULL; +	uint32_t rt_format, w, h; +	int colour_format = 0, zeta_format = 0; +	struct nv20_miptree *nv20mt = 0; + +	w = fb->cbufs[0]->width; +	h = fb->cbufs[0]->height; +	colour_format = fb->cbufs[0]->format; +	rt = fb->cbufs[0]; + +	if (fb->zsbuf) { +		if (colour_format) { +			assert(w == fb->zsbuf->width); +			assert(h == fb->zsbuf->height); +		} else { +			w = fb->zsbuf->width; +			h = fb->zsbuf->height; +		} + +		zeta_format = fb->zsbuf->format; +		zeta = fb->zsbuf; +	} + +	rt_format = NV20TCL_RT_FORMAT_TYPE_LINEAR | 0x20; + +	switch (colour_format) { +	case PIPE_FORMAT_A8R8G8B8_UNORM: +	case 0: +		rt_format |= NV20TCL_RT_FORMAT_COLOR_A8R8G8B8; +		break; +	case PIPE_FORMAT_R5G6B5_UNORM: +		rt_format |= NV20TCL_RT_FORMAT_COLOR_R5G6B5; +		break; +	default: +		assert(0); +	} + +	if (zeta) { +		BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1); +		OUT_RING  (rt->stride | (zeta->stride << 16)); +	} else { +		BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1); +		OUT_RING  (rt->stride | (rt->stride << 16)); +	} + +	nv20mt = (struct nv20_miptree *)rt->texture; +	nv20->rt[0] = nv20mt->buffer; + +	if (zeta_format) +	{ +		nv20mt = (struct nv20_miptree *)zeta->texture; +		nv20->zeta = nv20mt->buffer; +	} + +	BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 3); +	OUT_RING  ((w << 16) | 0); +	OUT_RING  ((h << 16) | 0); /*NV20TCL_RT_VERT */ +	OUT_RING  (rt_format); /* NV20TCL_RT_FORMAT */ +	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2); +	OUT_RING  (((w - 1) << 16) | 0); +	OUT_RING  (((h - 1) << 16) | 0); +} + +static void nv20_vertex_layout(struct nv20_context *nv20) +{ +	struct nv20_fragment_program *fp = nv20->fragprog.current; +	struct draw_context *dc = nv20->draw; +	int src; +	int i; +	struct vertex_info *vinfo = &nv20->vertex_info; +	const enum interp_mode colorInterp = INTERP_LINEAR; +	boolean colors[2] = { FALSE }; +	boolean generics[12] = { FALSE }; +	boolean fog = FALSE; + +	memset(vinfo, 0, sizeof(*vinfo)); + +	/* +	 * Assumed NV20 hardware vertex attribute order: +	 * 0 position, 1 ?, 2 ?, 3 col0, +	 * 4 col1?, 5 ?, 6 ?, 7 ?, +	 * 8 ?, 9 tex0, 10 tex1, 11 tex2, +	 * 12 tex3, 13 ?, 14 ?, 15 ? +	 * unaccounted: wgh, nor, fog +	 * There are total 16 attrs. +	 * vinfo->hwfmt[0] has a used-bit corresponding to each of these. +	 * relation to TGSI_SEMANTIC_*: +	 * - POSITION: position (always used) +	 * - COLOR: col1, col0 +	 * - GENERIC: tex3, tex2, tex1, tex0, normal, weight +	 * - FOG: fog +	 */ + +	for (i = 0; i < fp->info.num_inputs; i++) { +		int isn = fp->info.input_semantic_name[i]; +		int isi = fp->info.input_semantic_index[i]; +		switch (isn) { +		case TGSI_SEMANTIC_POSITION: +			break; +		case TGSI_SEMANTIC_COLOR: +			assert(isi < 2); +			colors[isi] = TRUE; +			break; +		case TGSI_SEMANTIC_GENERIC: +			assert(isi < 12); +			generics[isi] = TRUE; +			break; +		case TGSI_SEMANTIC_FOG: +			fog = TRUE; +			break; +		default: +			assert(0 && "unknown input_semantic_name"); +		} +	} + +	/* always do position */ { +		src = draw_find_vs_output(dc, TGSI_SEMANTIC_POSITION, 0); +		draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); +		vinfo->hwfmt[0] |= (1 << 0); +	} + +	/* two unnamed generics */ +	for (i = 4; i < 6; i++) { +		if (!generics[i]) +			continue; +		src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); +		draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); +		vinfo->hwfmt[0] |= (1 << (i - 3)); +	} + +	if (colors[0]) { +		src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 0); +		draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); +		vinfo->hwfmt[0] |= (1 << 3); +	} + +	if (colors[1]) { +		src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 1); +		draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); +		vinfo->hwfmt[0] |= (1 << 4); +	} + +	/* four unnamed generics */ +	for (i = 6; i < 10; i++) { +		if (!generics[i]) +			continue; +		src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); +		draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); +		vinfo->hwfmt[0] |= (1 << (i - 1)); +	} + +	/* tex0, tex1, tex2, tex3 */ +	for (i = 0; i < 4; i++) { +		if (!generics[i]) +			continue; +		src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); +		draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); +		vinfo->hwfmt[0] |= (1 << (i + 9)); +	} + +	/* two unnamed generics */ +	for (i = 10; i < 12; i++) { +		if (!generics[i]) +			continue; +		src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); +		draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); +		vinfo->hwfmt[0] |= (1 << (i + 3)); +	} + +	if (fog) { +		src = draw_find_vs_output(dc, TGSI_SEMANTIC_FOG, 0); +		draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); +		vinfo->hwfmt[0] |= (1 << 15); +	} + +	draw_compute_vertex_size(vinfo); +} + +void +nv20_emit_hw_state(struct nv20_context *nv20) +{ +	int i; + +	if (nv20->dirty & NV20_NEW_VERTPROG) { +		//nv20_vertprog_bind(nv20, nv20->vertprog.current); +		nv20->dirty &= ~NV20_NEW_VERTPROG; +	} + +	if (nv20->dirty & NV20_NEW_FRAGPROG) { +		nv20_fragprog_bind(nv20, nv20->fragprog.current); +		/*XXX: clear NV20_NEW_FRAGPROG if no new program uploaded */ +		nv20->dirty_samplers |= (1<<10); +		nv20->dirty_samplers = 0; +	} + +	if (nv20->dirty_samplers || (nv20->dirty & NV20_NEW_FRAGPROG)) { +		nv20_fragtex_bind(nv20); +		nv20->dirty &= ~NV20_NEW_FRAGPROG; +	} + +	if (nv20->dirty & NV20_NEW_VTXARRAYS) { +		nv20->dirty &= ~NV20_NEW_VTXARRAYS; +		nv20_vertex_layout(nv20); +		nv20_vtxbuf_bind(nv20); +	} + +	if (nv20->dirty & NV20_NEW_BLEND) { +		nv20->dirty &= ~NV20_NEW_BLEND; +		nv20_state_emit_blend(nv20); +	} + +	if (nv20->dirty & NV20_NEW_BLENDCOL) { +		nv20->dirty &= ~NV20_NEW_BLENDCOL; +		nv20_state_emit_blend_color(nv20); +	} + +	if (nv20->dirty & NV20_NEW_RAST) { +		nv20->dirty &= ~NV20_NEW_RAST; +		nv20_state_emit_rast(nv20); +	} + +	if (nv20->dirty & NV20_NEW_DSA) { +		nv20->dirty &= ~NV20_NEW_DSA; +		nv20_state_emit_dsa(nv20); +	} + + 	if (nv20->dirty & NV20_NEW_VIEWPORT) { +		nv20->dirty &= ~NV20_NEW_VIEWPORT; +		nv20_state_emit_viewport(nv20); +	} + + 	if (nv20->dirty & NV20_NEW_SCISSOR) { +		nv20->dirty &= ~NV20_NEW_SCISSOR; +		nv20_state_emit_scissor(nv20); +	} + + 	if (nv20->dirty & NV20_NEW_FRAMEBUFFER) { +		nv20->dirty &= ~NV20_NEW_FRAMEBUFFER; +		nv20_state_emit_framebuffer(nv20); +	} + +	/* Emit relocs for every referenced buffer. +	 * This is to ensure the bufmgr has an accurate idea of how +	 * the buffer is used.  This isn't very efficient, but we don't +	 * seem to take a significant performance hit.  Will be improved +	 * at some point.  Vertex arrays are emitted by nv20_vbo.c +	 */ + +	/* Render target */ +	BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 1); +	OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1); +	OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + +	if (nv20->zeta) { +		BEGIN_RING(kelvin, NV20TCL_DMA_ZETA, 1); +		OUT_RELOCo(nv20->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +		BEGIN_RING(kelvin, NV20TCL_ZETA_OFFSET, 1); +		OUT_RELOCl(nv20->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +		/* XXX for when we allocate LMA on nv17 */ +/*		BEGIN_RING(kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); +		OUT_RELOCl(nv20->zeta + lma_offset);*/ +	} + +	/* Vertex buffer */ +	BEGIN_RING(kelvin, NV20TCL_DMA_VTXBUF0, 1); +	OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1); +	OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + +	/* Texture images */ +	for (i = 0; i < 2; i++) { +		if (!(nv20->fp_samplers & (1 << i))) +			continue; +		BEGIN_RING(kelvin, NV20TCL_TX_OFFSET(i), 1); +		OUT_RELOCl(nv20->tex[i].buffer, 0, NOUVEAU_BO_VRAM | +			   NOUVEAU_BO_GART | NOUVEAU_BO_RD); +		BEGIN_RING(kelvin, NV20TCL_TX_FORMAT(i), 1); +		OUT_RELOCd(nv20->tex[i].buffer, nv20->tex[i].format, +			   NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | +			   NOUVEAU_BO_OR, NV20TCL_TX_FORMAT_DMA0, +			   NV20TCL_TX_FORMAT_DMA1); +	} +} + diff --git a/src/gallium/drivers/nv20/nv20_surface.c b/src/gallium/drivers/nv20/nv20_surface.c new file mode 100644 index 0000000000..6cd607583c --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_surface.c @@ -0,0 +1,72 @@ + +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "nv20_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" + +static void +nv20_surface_copy(struct pipe_context *pipe, boolean do_flip, +		  struct pipe_surface *dest, unsigned destx, unsigned desty, +		  struct pipe_surface *src, unsigned srcx, unsigned srcy, +		  unsigned width, unsigned height) +{ +	struct nv20_context *nv20 = nv20_context(pipe); +	struct nv04_surface_2d *eng2d = nv20->screen->eng2d; + +	if (do_flip) { +		desty += height; +		while (height--) { +			eng2d->copy(eng2d, dest, destx, desty--, src, +				    srcx, srcy++, width, 1); +		} +		return; +	} + +	eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); +} + +static void +nv20_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, +		  unsigned destx, unsigned desty, unsigned width, +		  unsigned height, unsigned value) +{ +	struct nv20_context *nv20 = nv20_context(pipe); +	struct nv04_surface_2d *eng2d = nv20->screen->eng2d; + +	eng2d->fill(eng2d, dest, destx, desty, width, height, value); +} + +void +nv20_init_surface_functions(struct nv20_context *nv20) +{ +	nv20->pipe.surface_copy = nv20_surface_copy; +	nv20->pipe.surface_fill = nv20_surface_fill; +} diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c new file mode 100644 index 0000000000..24d8f4bef0 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_vbo.c @@ -0,0 +1,78 @@ +#include "draw/draw_context.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv20_context.h" +#include "nv20_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" + +boolean nv20_draw_elements( struct pipe_context *pipe, +                    struct pipe_buffer *indexBuffer, +                    unsigned indexSize, +                    unsigned prim, unsigned start, unsigned count) +{ +	struct nv20_context *nv20 = nv20_context( pipe ); +	struct draw_context *draw = nv20->draw; +	unsigned i; + +	nv20_emit_hw_state(nv20); + +	/* +	 * Map vertex buffers +	 */ +	for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { +		if (nv20->vtxbuf[i].buffer) { +			void *buf +				= pipe->winsys->buffer_map(pipe->winsys, +						nv20->vtxbuf[i].buffer, +						PIPE_BUFFER_USAGE_CPU_READ); +			draw_set_mapped_vertex_buffer(draw, i, buf); +		} +	} +	/* Map index buffer, if present */ +	if (indexBuffer) { +		void *mapped_indexes +			= pipe->winsys->buffer_map(pipe->winsys, indexBuffer, +					PIPE_BUFFER_USAGE_CPU_READ); +		draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); +	} +	else { +		/* no index/element buffer */ +		draw_set_mapped_element_buffer(draw, 0, NULL); +	} + +	draw_set_mapped_constant_buffer(draw, +					nv20->constbuf[PIPE_SHADER_VERTEX], +					nv20->constbuf_nr[PIPE_SHADER_VERTEX]); + +	/* draw! */ +	draw_arrays(nv20->draw, prim, start, count); + +	/* +	 * unmap vertex/index buffers +	 */ +	for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { +		if (nv20->vtxbuf[i].buffer) { +			pipe->winsys->buffer_unmap(pipe->winsys, nv20->vtxbuf[i].buffer); +			draw_set_mapped_vertex_buffer(draw, i, NULL); +		} +	} +	if (indexBuffer) { +		pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); +		draw_set_mapped_element_buffer(draw, 0, NULL); +	} + +	draw_flush(nv20->draw); +	return TRUE; +} + +boolean nv20_draw_arrays( struct pipe_context *pipe, +				 unsigned prim, unsigned start, unsigned count) +{ +	return nv20_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c new file mode 100644 index 0000000000..5db0e807ff --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_vertprog.c @@ -0,0 +1,838 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" + +#include "nv20_context.h" +#include "nv20_state.h" + +/* TODO (at least...): + *  1. Indexed consts  + ARL + *  2. Arb. swz/negation + *  3. NV_vp11, NV_vp2, NV_vp3 features + *       - extra arith opcodes + *       - branching + *       - texture sampling + *       - indexed attribs + *       - indexed results + *  4. bugs + */ + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 8 +#define MASK_Y 4 +#define MASK_Z 2 +#define MASK_W 1 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE 0 +#define DEF_CTEST 0 +#include "nv20_shader.h" + +#define swz(s,x,y,z,w) nv20_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv20_sr_neg((s)) +#define abs(s) nv20_sr_abs((s)) + +struct nv20_vpc { +	struct nv20_vertex_program *vp; + +	struct nv20_vertex_program_exec *vpi; + +	unsigned output_map[PIPE_MAX_SHADER_OUTPUTS]; + +	int high_temp; +	int temp_temp_count; + +	struct nv20_sreg *imm; +	unsigned nr_imm; +}; + +static struct nv20_sreg +temp(struct nv20_vpc *vpc) +{ +	int idx; + +	idx  = vpc->temp_temp_count++; +	idx += vpc->high_temp + 1; +	return nv20_sr(NV30SR_TEMP, idx); +} + +static struct nv20_sreg +constant(struct nv20_vpc *vpc, int pipe, float x, float y, float z, float w) +{ +	struct nv20_vertex_program *vp = vpc->vp; +	struct nv20_vertex_program_data *vpd; +	int idx; + +	if (pipe >= 0) { +		for (idx = 0; idx < vp->nr_consts; idx++) { +			if (vp->consts[idx].index == pipe) +				return nv20_sr(NV30SR_CONST, idx); +		} +	} + +	idx = vp->nr_consts++; +	vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); +	vpd = &vp->consts[idx]; + +	vpd->index = pipe; +	vpd->value[0] = x; +	vpd->value[1] = y; +	vpd->value[2] = z; +	vpd->value[3] = w; +	return nv20_sr(NV30SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ +	nv20_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + +static void +emit_src(struct nv20_vpc *vpc, uint32_t *hw, int pos, struct nv20_sreg src) +{ +	struct nv20_vertex_program *vp = vpc->vp; +	uint32_t sr = 0; + +	switch (src.type) { +	case NV30SR_TEMP: +		sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT); +		sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT); +		break; +	case NV30SR_INPUT: +		sr |= (NV30_VP_SRC_REG_TYPE_INPUT << +		       NV30_VP_SRC_REG_TYPE_SHIFT); +		vp->ir |= (1 << src.index); +		hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT); +		break; +	case NV30SR_CONST: +		sr |= (NV30_VP_SRC_REG_TYPE_CONST << +		       NV30_VP_SRC_REG_TYPE_SHIFT); +		assert(vpc->vpi->const_index == -1 || +		       vpc->vpi->const_index == src.index); +		vpc->vpi->const_index = src.index; +		break; +	case NV30SR_NONE: +		sr |= (NV30_VP_SRC_REG_TYPE_INPUT << +		       NV30_VP_SRC_REG_TYPE_SHIFT); +		break; +	default: +		assert(0); +	} + +	if (src.negate) +		sr |= NV30_VP_SRC_NEGATE; + +	if (src.abs) +		hw[0] |= (1 << (21 + pos)); + +	sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) | +	       (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) | +	       (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) | +	       (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT)); + +/* + * |VVV| + * d�.�b + *  \u/ + * + */ + +	switch (pos) { +	case 0: +		hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >> +			  NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT; +		hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) << +			  NV30_VP_INST_SRC0L_SHIFT; +		break; +	case 1: +		hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT; +		break; +	case 2: +		hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >> +			  NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT; +		hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) << +			  NV30_VP_INST_SRC2L_SHIFT; +		break; +	default: +		assert(0); +	} +} + +static void +emit_dst(struct nv20_vpc *vpc, uint32_t *hw, int slot, struct nv20_sreg dst) +{ +	struct nv20_vertex_program *vp = vpc->vp; + +	switch (dst.type) { +	case NV30SR_TEMP: +		hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); +		break; +	case NV30SR_OUTPUT: +		switch (dst.index) { +		case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; +		case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; +		case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; +		case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; +		case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; +		case NV30_VP_INST_DEST_PSZ  : vp->or |= (1 << 5); break; +		case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; +		case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; +		case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; +		case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; +		case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; +		case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; +		case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; +		case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; +		default: +			break; +		} + +		hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); +		hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + +		/*XXX: no way this is entirely correct, someone needs to +		 *     figure out what exactly it is. +		 */ +		hw[3] |= 0x800; +		break; +	default: +		assert(0); +	} +} + +static void +nv20_vp_arith(struct nv20_vpc *vpc, int slot, int op, +	      struct nv20_sreg dst, int mask, +	      struct nv20_sreg s0, struct nv20_sreg s1, +	      struct nv20_sreg s2) +{ +	struct nv20_vertex_program *vp = vpc->vp; +	uint32_t *hw; + +	vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); +	vpc->vpi = &vp->insns[vp->nr_insns - 1]; +	memset(vpc->vpi, 0, sizeof(*vpc->vpi)); +	vpc->vpi->const_index = -1; + +	hw = vpc->vpi->data; + +	hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT); +	hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) | +		  (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) | +		  (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) | +		  (3 << NV30_VP_INST_COND_SWZ_W_SHIFT)); + +	hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); +//	hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK; +//	hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT); + +	if (dst.type == NV30SR_OUTPUT) { +		if (slot) +			hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); +		else +			hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); +	} else { +		if (slot) +			hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); +		else +			hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); +	} + +	emit_dst(vpc, hw, slot, dst); +	emit_src(vpc, hw, 0, s0); +	emit_src(vpc, hw, 1, s1); +	emit_src(vpc, hw, 2, s2); +} + +static INLINE struct nv20_sreg +tgsi_src(struct nv20_vpc *vpc, const struct tgsi_full_src_register *fsrc) { +	struct nv20_sreg src; + +	switch (fsrc->SrcRegister.File) { +	case TGSI_FILE_INPUT: +		src = nv20_sr(NV30SR_INPUT, fsrc->SrcRegister.Index); +		break; +	case TGSI_FILE_CONSTANT: +		src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); +		break; +	case TGSI_FILE_IMMEDIATE: +		src = vpc->imm[fsrc->SrcRegister.Index]; +		break; +	case TGSI_FILE_TEMPORARY: +		if (vpc->high_temp < fsrc->SrcRegister.Index) +			vpc->high_temp = fsrc->SrcRegister.Index; +		src = nv20_sr(NV30SR_TEMP, fsrc->SrcRegister.Index); +		break; +	default: +		NOUVEAU_ERR("bad src file\n"); +		break; +	} + +	src.abs = fsrc->SrcRegisterExtMod.Absolute; +	src.negate = fsrc->SrcRegister.Negate; +	src.swz[0] = fsrc->SrcRegister.SwizzleX; +	src.swz[1] = fsrc->SrcRegister.SwizzleY; +	src.swz[2] = fsrc->SrcRegister.SwizzleZ; +	src.swz[3] = fsrc->SrcRegister.SwizzleW; +	return src; +} + +static INLINE struct nv20_sreg +tgsi_dst(struct nv20_vpc *vpc, const struct tgsi_full_dst_register *fdst) { +	struct nv20_sreg dst; + +	switch (fdst->DstRegister.File) { +	case TGSI_FILE_OUTPUT: +		dst = nv20_sr(NV30SR_OUTPUT, +			      vpc->output_map[fdst->DstRegister.Index]); + +		break; +	case TGSI_FILE_TEMPORARY: +		dst = nv20_sr(NV30SR_TEMP, fdst->DstRegister.Index); +		if (vpc->high_temp < dst.index) +			vpc->high_temp = dst.index; +		break; +	default: +		NOUVEAU_ERR("bad dst file\n"); +		break; +	} + +	return dst; +} + +static INLINE int +tgsi_mask(uint tgsi) +{ +	int mask = 0; + +	if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; +	if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; +	if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; +	if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; +	return mask; +} + +static boolean +nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, +				const struct tgsi_full_instruction *finst) +{ +	struct nv20_sreg src[3], dst, tmp; +	struct nv20_sreg none = nv20_sr(NV30SR_NONE, 0); +	int mask; +	int ai = -1, ci = -1; +	int i; + +	if (finst->Instruction.Opcode == TGSI_OPCODE_END) +		return TRUE; + +	vpc->temp_temp_count = 0; +	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { +		const struct tgsi_full_src_register *fsrc; + +		fsrc = &finst->FullSrcRegisters[i]; +		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { +			src[i] = tgsi_src(vpc, fsrc); +		} +	} + +	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { +		const struct tgsi_full_src_register *fsrc; + +		fsrc = &finst->FullSrcRegisters[i]; +		switch (fsrc->SrcRegister.File) { +		case TGSI_FILE_INPUT: +			if (ai == -1 || ai == fsrc->SrcRegister.Index) { +				ai = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(vpc, fsrc); +			} else { +				src[i] = temp(vpc); +				arith(vpc, 0, OP_MOV, src[i], MASK_ALL, +				      tgsi_src(vpc, fsrc), none, none); +			} +			break; +		/*XXX: index comparison is broken now that consts come from +		 *     two different register files. +		 */ +		case TGSI_FILE_CONSTANT: +		case TGSI_FILE_IMMEDIATE: +			if (ci == -1 || ci == fsrc->SrcRegister.Index) { +				ci = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(vpc, fsrc); +			} else { +				src[i] = temp(vpc); +				arith(vpc, 0, OP_MOV, src[i], MASK_ALL, +				      tgsi_src(vpc, fsrc), none, none); +			} +			break; +		case TGSI_FILE_TEMPORARY: +			/* handled above */ +			break; +		default: +			NOUVEAU_ERR("bad src file\n"); +			return FALSE; +		} +	} + +	dst  = tgsi_dst(vpc, &finst->FullDstRegisters[0]); +	mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + +	switch (finst->Instruction.Opcode) { +	case TGSI_OPCODE_ABS: +		arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); +		break; +	case TGSI_OPCODE_ADD: +		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); +		break; +	case TGSI_OPCODE_ARL: +		arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_DP3: +		arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DP4: +		arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DPH: +		arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DST: +		arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_EX2: +		arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_EXP: +		arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_FLR: +		arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_FRC: +		arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_LG2: +		arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_LIT: +		arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_LOG: +		arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_MAD: +		arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); +		break; +	case TGSI_OPCODE_MAX: +		arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_MIN: +		arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_MOV: +		arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_MUL: +		arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_POW: +		tmp = temp(vpc); +		arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, +		      swz(src[0], X, X, X, X)); +		arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), +		      swz(src[1], X, X, X, X), none); +		arith(vpc, 1, OP_EX2, dst, mask, none, none, +		      swz(tmp, X, X, X, X)); +		break; +	case TGSI_OPCODE_RCP: +		arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_RET: +		break; +	case TGSI_OPCODE_RSQ: +		arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_SGE: +		arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SGT: +		arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SLT: +		arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SUB: +		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); +		break; +	case TGSI_OPCODE_XPD: +		tmp = temp(vpc); +		arith(vpc, 0, OP_MUL, tmp, mask, +		      swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); +		arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), +		      swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), +		      neg(tmp)); +		break; +	default: +		NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); +		return FALSE; +	} + +	return TRUE; +} + +static boolean +nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc, +				const struct tgsi_full_declaration *fdec) +{ +	int hw; + +	switch (fdec->Semantic.SemanticName) { +	case TGSI_SEMANTIC_POSITION: +		hw = NV30_VP_INST_DEST_POS; +		break; +	case TGSI_SEMANTIC_COLOR: +		if (fdec->Semantic.SemanticIndex == 0) { +			hw = NV30_VP_INST_DEST_COL0; +		} else +		if (fdec->Semantic.SemanticIndex == 1) { +			hw = NV30_VP_INST_DEST_COL1; +		} else { +			NOUVEAU_ERR("bad colour semantic index\n"); +			return FALSE; +		} +		break; +	case TGSI_SEMANTIC_BCOLOR: +		if (fdec->Semantic.SemanticIndex == 0) { +			hw = NV30_VP_INST_DEST_BFC0; +		} else +		if (fdec->Semantic.SemanticIndex == 1) { +			hw = NV30_VP_INST_DEST_BFC1; +		} else { +			NOUVEAU_ERR("bad bcolour semantic index\n"); +			return FALSE; +		} +		break; +	case TGSI_SEMANTIC_FOG: +		hw = NV30_VP_INST_DEST_FOGC; +		break; +	case TGSI_SEMANTIC_PSIZE: +		hw = NV30_VP_INST_DEST_PSZ; +		break; +	case TGSI_SEMANTIC_GENERIC: +		if (fdec->Semantic.SemanticIndex <= 7) { +			hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); +		} else { +			NOUVEAU_ERR("bad generic semantic index\n"); +			return FALSE; +		} +		break; +	default: +		NOUVEAU_ERR("bad output semantic\n"); +		return FALSE; +	} + +	vpc->output_map[fdec->DeclarationRange.First] = hw; +	return TRUE; +} + +static boolean +nv20_vertprog_prepare(struct nv20_vpc *vpc) +{ +	struct tgsi_parse_context p; +	int nr_imm = 0; + +	tgsi_parse_init(&p, vpc->vp->pipe.tokens); +	while (!tgsi_parse_end_of_tokens(&p)) { +		const union tgsi_full_token *tok = &p.FullToken; + +		tgsi_parse_token(&p); +		switch(tok->Token.Type) { +		case TGSI_TOKEN_TYPE_IMMEDIATE: +			nr_imm++; +			break; +		default: +			break; +		} +	} +	tgsi_parse_free(&p); + +	if (nr_imm) { +		vpc->imm = CALLOC(nr_imm, sizeof(struct nv20_sreg)); +		assert(vpc->imm); +	} + +	return TRUE; +} + +static void +nv20_vertprog_translate(struct nv20_context *nv20, +			struct nv20_vertex_program *vp) +{ +	struct tgsi_parse_context parse; +	struct nv20_vpc *vpc = NULL; + +	tgsi_dump(vp->pipe.tokens,0); + +	vpc = CALLOC(1, sizeof(struct nv20_vpc)); +	if (!vpc) +		return; +	vpc->vp = vp; +	vpc->high_temp = -1; + +	if (!nv20_vertprog_prepare(vpc)) { +		FREE(vpc); +		return; +	} + +	tgsi_parse_init(&parse, vp->pipe.tokens); + +	while (!tgsi_parse_end_of_tokens(&parse)) { +		tgsi_parse_token(&parse); + +		switch (parse.FullToken.Token.Type) { +		case TGSI_TOKEN_TYPE_DECLARATION: +		{ +			const struct tgsi_full_declaration *fdec; +			fdec = &parse.FullToken.FullDeclaration; +			switch (fdec->Declaration.File) { +			case TGSI_FILE_OUTPUT: +				if (!nv20_vertprog_parse_decl_output(vpc, fdec)) +					goto out_err; +				break; +			default: +				break; +			} +		} +			break; +		case TGSI_TOKEN_TYPE_IMMEDIATE: +		{ +			const struct tgsi_full_immediate *imm; + +			imm = &parse.FullToken.FullImmediate; +			assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); +			assert(imm->Immediate.NrTokens == 4 + 1); +			vpc->imm[vpc->nr_imm++] = +				constant(vpc, -1, +					 imm->u.ImmediateFloat32[0].Float, +					 imm->u.ImmediateFloat32[1].Float, +					 imm->u.ImmediateFloat32[2].Float, +					 imm->u.ImmediateFloat32[3].Float); +		} +			break; +		case TGSI_TOKEN_TYPE_INSTRUCTION: +		{ +			const struct tgsi_full_instruction *finst; +			finst = &parse.FullToken.FullInstruction; +			if (!nv20_vertprog_parse_instruction(vpc, finst)) +				goto out_err; +		} +			break; +		default: +			break; +		} +	} + +	vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST; +	vp->translated = TRUE; +out_err: +	tgsi_parse_free(&parse); +	FREE(vpc); +} + +static boolean +nv20_vertprog_validate(struct nv20_context *nv20) +{  +	struct nouveau_winsys *nvws = nv20->nvws; +	struct pipe_winsys *ws = nv20->pipe.winsys; +	struct nouveau_grobj *rankine = nv20->screen->rankine; +	struct nv20_vertex_program *vp; +	struct pipe_buffer *constbuf; +	boolean upload_code = FALSE, upload_data = FALSE; +	int i; + +	vp = nv20->vertprog; +	constbuf = nv20->constbuf[PIPE_SHADER_VERTEX]; + +	/* Translate TGSI shader into hw bytecode */ +	if (!vp->translated) { +		nv20_vertprog_translate(nv20, vp); +		if (!vp->translated) +			return FALSE; +	} + +	/* Allocate hw vtxprog exec slots */ +	if (!vp->exec) { +		struct nouveau_resource *heap = nv20->screen->vp_exec_heap; +		struct nouveau_stateobj *so; +		uint vplen = vp->nr_insns; + +		if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { +			while (heap->next && heap->size < vplen) { +				struct nv20_vertex_program *evict; +				 +				evict = heap->next->priv; +				nvws->res_free(&evict->exec); +			} + +			if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) +				assert(0); +		} + +		so = so_new(2, 0); +		so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); +		so_data  (so, vp->exec->start); +		so_ref(so, &vp->so); + +		upload_code = TRUE; +	} + +	/* Allocate hw vtxprog const slots */ +	if (vp->nr_consts && !vp->data) { +		struct nouveau_resource *heap = nv20->screen->vp_data_heap; + +		if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { +			while (heap->next && heap->size < vp->nr_consts) { +				struct nv20_vertex_program *evict; +				 +				evict = heap->next->priv; +				nvws->res_free(&evict->data); +			} + +			if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) +				assert(0); +		} + +		/*XXX: handle this some day */ +		assert(vp->data->start >= vp->data_start_min); + +		upload_data = TRUE; +		if (vp->data_start != vp->data->start) +			upload_code = TRUE; +	} + +	/* If exec or data segments moved we need to patch the program to +	 * fixup offsets and register IDs. +	 */ +	if (vp->exec_start != vp->exec->start) { +		for (i = 0; i < vp->nr_insns; i++) { +			struct nv20_vertex_program_exec *vpi = &vp->insns[i]; + +			if (vpi->has_branch_offset) { +				assert(0); +			} +		} + +		vp->exec_start = vp->exec->start; +	} + +	if (vp->nr_consts && vp->data_start != vp->data->start) { +		for (i = 0; i < vp->nr_insns; i++) { +			struct nv20_vertex_program_exec *vpi = &vp->insns[i]; + +			if (vpi->const_index >= 0) { +				vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK; +				vpi->data[1] |= +					(vpi->const_index + vp->data->start) << +					NV30_VP_INST_CONST_SRC_SHIFT; + +			} +		} + +		vp->data_start = vp->data->start; +	} + +	/* Update + Upload constant values */ +	if (vp->nr_consts) { +		float *map = NULL; + +		if (constbuf) { +			map = ws->buffer_map(ws, constbuf, +					     PIPE_BUFFER_USAGE_CPU_READ); +		} + +		for (i = 0; i < vp->nr_consts; i++) { +			struct nv20_vertex_program_data *vpd = &vp->consts[i]; + +			if (vpd->index >= 0) { +				if (!upload_data && +				    !memcmp(vpd->value, &map[vpd->index * 4], +					    4 * sizeof(float))) +					continue; +				memcpy(vpd->value, &map[vpd->index * 4], +				       4 * sizeof(float)); +			} + +			BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); +			OUT_RING  (i + vp->data->start); +			OUT_RINGp ((uint32_t *)vpd->value, 4); +		} + +		if (constbuf) { +			ws->buffer_unmap(ws, constbuf); +		} +	} + +	/* Upload vtxprog */ +	if (upload_code) { +#if 0 +		for (i = 0; i < vp->nr_insns; i++) { +			NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n", +				i, vp->insns[i].data[0], vp->insns[i].data[1], +				vp->insns[i].data[2], vp->insns[i].data[3]); +		} +#endif +		BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); +		OUT_RING  (vp->exec->start); +		for (i = 0; i < vp->nr_insns; i++) { +			BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4); +			OUT_RINGp (vp->insns[i].data, 4); +		} +	} + +	if (vp->so != nv20->state.hw[NV30_STATE_VERTPROG]) { +		so_ref(vp->so, &nv20->state.hw[NV30_STATE_VERTPROG]); +		return TRUE; +	} + +	return FALSE; +} + +void +nv20_vertprog_destroy(struct nv20_context *nv20, struct nv20_vertex_program *vp) +{ +	struct nouveau_winsys *nvws = nv20->screen->nvws; + +	vp->translated = FALSE; + +	if (vp->nr_insns) { +		FREE(vp->insns); +		vp->insns = NULL; +		vp->nr_insns = 0; +	} + +	if (vp->nr_consts) { +		FREE(vp->consts); +		vp->consts = NULL; +		vp->nr_consts = 0; +	} + +	nvws->res_free(&vp->exec); +	vp->exec_start = 0; +	nvws->res_free(&vp->data); +	vp->data_start = 0; +	vp->data_start_min = 0; + +	vp->ir = vp->or = 0; +	so_ref(NULL, &vp->so); +} + +struct nv20_state_entry nv20_state_vertprog = { +	.validate = nv20_vertprog_validate, +	.dirty = { +		.pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/, +		.hw = NV30_STATE_VERTPROG, +	} +}; diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile new file mode 100644 index 0000000000..69f2790dfe --- /dev/null +++ b/src/gallium/drivers/nv30/Makefile @@ -0,0 +1,37 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv30 + +DRIVER_SOURCES = \ +	nv30_clear.c \ +	nv30_context.c \ +	nv30_draw.c \ +	nv30_fragprog.c \ +	nv30_fragtex.c \ +	nv30_miptree.c \ +	nv30_query.c \ +	nv30_screen.c \ +	nv30_state.c \ +	nv30_state_blend.c \ +	nv30_state_emit.c \ +	nv30_state_fb.c \ +	nv30_state_rasterizer.c \ +	nv30_state_scissor.c \ +	nv30_state_stipple.c \ +	nv30_state_viewport.c \ +	nv30_state_zsa.c \ +	nv30_surface.c \ +	nv30_vbo.c \ +	nv30_vertprog.c + +C_SOURCES = \ +	$(COMMON_SOURCES) \ +	$(DRIVER_SOURCES) + +ASM_SOURCES =  + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv30/nv30_clear.c b/src/gallium/drivers/nv30/nv30_clear.c new file mode 100644 index 0000000000..8c3ca204d5 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_clear.c @@ -0,0 +1,13 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv30_context.h" + +void +nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps, +	   unsigned clearValue) +{ +	pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +	ps->status = PIPE_SURFACE_STATUS_CLEAR; +} diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c new file mode 100644 index 0000000000..61654f8756 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -0,0 +1,72 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv30_context.h" +#include "nv30_screen.h" + +static void +nv30_flush(struct pipe_context *pipe, unsigned flags, +	   struct pipe_fence_handle **fence) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	 +	if (flags & PIPE_FLUSH_TEXTURE_CACHE) { +		BEGIN_RING(rankine, 0x1fd8, 1); +		OUT_RING  (2); +		BEGIN_RING(rankine, 0x1fd8, 1); +		OUT_RING  (1); +	} + +	FIRE_RING(fence); +} + +static void +nv30_destroy(struct pipe_context *pipe) +{ +	struct nv30_context *nv30 = nv30_context(pipe); + +	if (nv30->draw) +		draw_destroy(nv30->draw); +	FREE(nv30); +} + +struct pipe_context * +nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ +	struct nv30_screen *screen = nv30_screen(pscreen); +	struct pipe_winsys *ws = pscreen->winsys; +	struct nv30_context *nv30; +	struct nouveau_winsys *nvws = screen->nvws; + +	nv30 = CALLOC(1, sizeof(struct nv30_context)); +	if (!nv30) +		return NULL; +	nv30->screen = screen; +	nv30->pctx_id = pctx_id; + +	nv30->nvws = nvws; + +	nv30->pipe.winsys = ws; +	nv30->pipe.screen = pscreen; +	nv30->pipe.destroy = nv30_destroy; +	nv30->pipe.draw_arrays = nv30_draw_arrays; +	nv30->pipe.draw_elements = nv30_draw_elements; +	nv30->pipe.clear = nv30_clear; +	nv30->pipe.flush = nv30_flush; + +	nv30_init_query_functions(nv30); +	nv30_init_surface_functions(nv30); +	nv30_init_state_functions(nv30); + +	/* Create, configure, and install fallback swtnl path */ +	nv30->draw = draw_create(); +	draw_wide_point_threshold(nv30->draw, 9999999.0); +	draw_wide_line_threshold(nv30->draw, 9999999.0); +	draw_enable_line_stipple(nv30->draw, FALSE); +	draw_enable_point_sprites(nv30->draw, FALSE); +	draw_set_rasterize_stage(nv30->draw, nv30_draw_render_stage(nv30)); + +	return &nv30->pipe; +} +	 diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h new file mode 100644 index 0000000000..b933769700 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -0,0 +1,212 @@ +#ifndef __NV30_CONTEXT_H__ +#define __NV30_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx)                                              \ +	struct nv30_screen *ctx = nv30->screen +#include "nouveau/nouveau_push.h" +#include "nouveau/nouveau_stateobj.h" + +#include "nv30_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ +	fprintf(stderr, "%s:%d -  "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ +	fprintf(stderr, "nouveau: "fmt, ##args); + +enum nv30_state_index { +	NV30_STATE_FB = 0, +	NV30_STATE_VIEWPORT = 1, +	NV30_STATE_BLEND = 2, +	NV30_STATE_RAST = 3, +	NV30_STATE_ZSA = 4, +	NV30_STATE_BCOL = 5, +	NV30_STATE_CLIP = 6, +	NV30_STATE_SCISSOR = 7, +	NV30_STATE_STIPPLE = 8, +	NV30_STATE_FRAGPROG = 9, +	NV30_STATE_VERTPROG = 10, +	NV30_STATE_FRAGTEX0 = 11, +	NV30_STATE_FRAGTEX1 = 12, +	NV30_STATE_FRAGTEX2 = 13, +	NV30_STATE_FRAGTEX3 = 14, +	NV30_STATE_FRAGTEX4 = 15, +	NV30_STATE_FRAGTEX5 = 16, +	NV30_STATE_FRAGTEX6 = 17, +	NV30_STATE_FRAGTEX7 = 18, +	NV30_STATE_FRAGTEX8 = 19, +	NV30_STATE_FRAGTEX9 = 20, +	NV30_STATE_FRAGTEX10 = 21, +	NV30_STATE_FRAGTEX11 = 22, +	NV30_STATE_FRAGTEX12 = 23, +	NV30_STATE_FRAGTEX13 = 24, +	NV30_STATE_FRAGTEX14 = 25, +	NV30_STATE_FRAGTEX15 = 26, +	NV30_STATE_VERTTEX0 = 27, +	NV30_STATE_VERTTEX1 = 28, +	NV30_STATE_VERTTEX2 = 29, +	NV30_STATE_VERTTEX3 = 30, +	NV30_STATE_VTXBUF = 31, +	NV30_STATE_VTXFMT = 32, +	NV30_STATE_VTXATTR = 33, +	NV30_STATE_MAX = 34 +}; + +#include "nv30_screen.h" + +#define NV30_NEW_BLEND		(1 <<  0) +#define NV30_NEW_RAST		(1 <<  1) +#define NV30_NEW_ZSA		(1 <<  2) +#define NV30_NEW_SAMPLER	(1 <<  3) +#define NV30_NEW_FB		(1 <<  4) +#define NV30_NEW_STIPPLE	(1 <<  5) +#define NV30_NEW_SCISSOR	(1 <<  6) +#define NV30_NEW_VIEWPORT	(1 <<  7) +#define NV30_NEW_BCOL		(1 <<  8) +#define NV30_NEW_VERTPROG	(1 <<  9) +#define NV30_NEW_FRAGPROG	(1 << 10) +#define NV30_NEW_ARRAYS		(1 << 11) +#define NV30_NEW_UCP		(1 << 12) + +struct nv30_rasterizer_state { +	struct pipe_rasterizer_state pipe; +	struct nouveau_stateobj *so; +}; + +struct nv30_zsa_state { +	struct pipe_depth_stencil_alpha_state pipe; +	struct nouveau_stateobj *so; +}; + +struct nv30_blend_state { +	struct pipe_blend_state pipe; +	struct nouveau_stateobj *so; +}; + + +struct nv30_state { +	unsigned scissor_enabled; +	unsigned stipple_enabled; +	unsigned viewport_bypass; +	unsigned fp_samplers; + +	uint64_t dirty; +	struct nouveau_stateobj *hw[NV30_STATE_MAX]; +}; + +struct nv30_context { +	struct pipe_context pipe; + +	struct nouveau_winsys *nvws; +	struct nv30_screen *screen; +	unsigned pctx_id; + +	struct draw_context *draw; + +	/* HW state derived from pipe states */ +	struct nv30_state state; + +	/* Context state */ +	unsigned dirty; +	struct pipe_scissor_state scissor; +	unsigned stipple[32]; +	struct nv30_vertex_program *vertprog; +	struct nv30_fragment_program *fragprog; +	struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; +	unsigned constbuf_nr[PIPE_SHADER_TYPES]; +	struct nv30_rasterizer_state *rasterizer; +	struct nv30_zsa_state *zsa; +	struct nv30_blend_state *blend; +	struct pipe_blend_color blend_colour; +	struct pipe_viewport_state viewport; +	struct pipe_framebuffer_state framebuffer; +	struct pipe_buffer *idxbuf; +	unsigned idxbuf_format; +	struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; +	struct nv30_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; +	unsigned nr_samplers; +	unsigned nr_textures; +	unsigned dirty_samplers; +	struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; +	unsigned vtxbuf_nr; +	struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; +	unsigned vtxelt_nr; +	const unsigned *edgeflags; +}; + +static INLINE struct nv30_context * +nv30_context(struct pipe_context *pipe) +{ +	return (struct nv30_context *)pipe; +} + +struct nv30_state_entry { +	boolean (*validate)(struct nv30_context *nv30); +	struct { +		unsigned pipe; +		unsigned hw; +	} dirty; +}; + +extern void nv30_init_state_functions(struct nv30_context *nv30); +extern void nv30_init_surface_functions(struct nv30_context *nv30); +extern void nv30_init_query_functions(struct nv30_context *nv30); + +extern void nv30_screen_init_miptree_functions(struct pipe_screen *pscreen); + +/* nv30_draw.c */ +extern struct draw_stage *nv30_draw_render_stage(struct nv30_context *nv30); + +/* nv30_vertprog.c */ +extern void nv30_vertprog_destroy(struct nv30_context *, +				  struct nv30_vertex_program *); + +/* nv30_fragprog.c */ +extern void nv30_fragprog_destroy(struct nv30_context *, +				  struct nv30_fragment_program *); + +/* nv30_fragtex.c */ +extern void nv30_fragtex_bind(struct nv30_context *); + +/* nv30_state.c and friends */ +extern boolean nv30_state_validate(struct nv30_context *nv30); +extern void nv30_state_emit(struct nv30_context *nv30); +extern struct nv30_state_entry nv30_state_rasterizer; +extern struct nv30_state_entry nv30_state_scissor; +extern struct nv30_state_entry nv30_state_stipple; +extern struct nv30_state_entry nv30_state_fragprog; +extern struct nv30_state_entry nv30_state_vertprog; +extern struct nv30_state_entry nv30_state_blend; +extern struct nv30_state_entry nv30_state_blend_colour; +extern struct nv30_state_entry nv30_state_zsa; +extern struct nv30_state_entry nv30_state_viewport; +extern struct nv30_state_entry nv30_state_framebuffer; +extern struct nv30_state_entry nv30_state_fragtex; +extern struct nv30_state_entry nv30_state_vbo; + +/* nv30_vbo.c */ +extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode, +				unsigned start, unsigned count); +extern boolean nv30_draw_elements(struct pipe_context *pipe, +				  struct pipe_buffer *indexBuffer, +				  unsigned indexSize, +				  unsigned mode, unsigned start, +				  unsigned count); + +/* nv30_clear.c */ +extern void nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps, +		       unsigned clearValue); + +#endif diff --git a/src/gallium/drivers/nv30/nv30_draw.c b/src/gallium/drivers/nv30/nv30_draw.c new file mode 100644 index 0000000000..74fc138c05 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_draw.c @@ -0,0 +1,61 @@ +#include "draw/draw_pipe.h" + +#include "nv30_context.h" + +struct nv30_draw_stage { +	struct draw_stage draw; +	struct nv30_context *nv30; +}; + +static void +nv30_draw_point(struct draw_stage *draw, struct prim_header *prim) +{ +	NOUVEAU_ERR("\n"); +} + +static void +nv30_draw_line(struct draw_stage *draw, struct prim_header *prim) +{ +	NOUVEAU_ERR("\n"); +} + +static void +nv30_draw_tri(struct draw_stage *draw, struct prim_header *prim) +{ +	NOUVEAU_ERR("\n"); +} + +static void +nv30_draw_flush(struct draw_stage *draw, unsigned flags) +{ +} + +static void +nv30_draw_reset_stipple_counter(struct draw_stage *draw) +{ +	NOUVEAU_ERR("\n"); +} + +static void +nv30_draw_destroy(struct draw_stage *draw) +{ +	FREE(draw); +} + +struct draw_stage * +nv30_draw_render_stage(struct nv30_context *nv30) +{ +	struct nv30_draw_stage *nv30draw = CALLOC_STRUCT(nv30_draw_stage); + +	nv30draw->nv30 = nv30; +	nv30draw->draw.draw = nv30->draw; +	nv30draw->draw.point = nv30_draw_point; +	nv30draw->draw.line = nv30_draw_line; +	nv30draw->draw.tri = nv30_draw_tri; +	nv30draw->draw.flush = nv30_draw_flush; +	nv30draw->draw.reset_stipple_counter = nv30_draw_reset_stipple_counter; +	nv30draw->draw.destroy = nv30_draw_destroy; + +	return &nv30draw->draw; +} + diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c new file mode 100644 index 0000000000..320ba3f4bf --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -0,0 +1,911 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv30_context.h" + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 1 +#define MASK_Y 2 +#define MASK_Z 4 +#define MASK_W 8 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X +#define DEF_CTEST NV30_FP_OP_COND_TR +#include "nv30_shader.h" + +#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv30_sr_neg((s)) +#define abs(s) nv30_sr_abs((s)) +#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v) + +#define MAX_CONSTS 128 +#define MAX_IMM 32 +struct nv30_fpc { +	struct nv30_fragment_program *fp; + +	uint attrib_map[PIPE_MAX_SHADER_INPUTS]; + +	int high_temp; +	int temp_temp_count; +	int num_regs; + +	uint depth_id; +	uint colour_id; + +	unsigned inst_offset; + +	struct { +		int pipe; +		float vals[4]; +	} consts[MAX_CONSTS]; +	int nr_consts; + +	struct nv30_sreg imm[MAX_IMM]; +	unsigned nr_imm; +}; + +static INLINE struct nv30_sreg +temp(struct nv30_fpc *fpc) +{ +	int idx; + +	idx  = fpc->temp_temp_count++; +	idx += fpc->high_temp + 1; +	return nv30_sr(NV30SR_TEMP, idx); +} + +static INLINE struct nv30_sreg +constant(struct nv30_fpc *fpc, int pipe, float vals[4]) +{ +	int idx; + +	if (fpc->nr_consts == MAX_CONSTS) +		assert(0); +	idx = fpc->nr_consts++; + +	fpc->consts[idx].pipe = pipe; +	if (pipe == -1) +		memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); +	return nv30_sr(NV30SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ +	nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \ +			(d), (m), (s0), (s1), (s2)) +#define tex(cc,s,o,u,d,m,s0,s1,s2) \ +	nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \ +		    (d), (m), (s0), none, none) + +static void +grow_insns(struct nv30_fpc *fpc, int size) +{ +	struct nv30_fragment_program *fp = fpc->fp; + +	fp->insn_len += size; +	fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); +} + +static void +emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src) +{ +	struct nv30_fragment_program *fp = fpc->fp; +	uint32_t *hw = &fp->insn[fpc->inst_offset]; +	uint32_t sr = 0; + +	switch (src.type) { +	case NV30SR_INPUT: +		sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); +		hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT); +		break; +	case NV30SR_OUTPUT: +		sr |= NV30_FP_REG_SRC_HALF; +		/* fall-through */ +	case NV30SR_TEMP: +		sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT); +		sr |= (src.index << NV30_FP_REG_SRC_SHIFT); +		break; +	case NV30SR_CONST: +		grow_insns(fpc, 4); +		hw = &fp->insn[fpc->inst_offset]; +		if (fpc->consts[src.index].pipe >= 0) { +			struct nv30_fragment_program_data *fpd; + +			fp->consts = realloc(fp->consts, ++fp->nr_consts * +					     sizeof(*fpd)); +			fpd = &fp->consts[fp->nr_consts - 1]; +			fpd->offset = fpc->inst_offset + 4; +			fpd->index = fpc->consts[src.index].pipe; +			memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); +		} else { +			memcpy(&fp->insn[fpc->inst_offset + 4], +				fpc->consts[src.index].vals, +				sizeof(uint32_t) * 4); +		} + +		sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);	 +		break; +	case NV30SR_NONE: +		sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); +		break; +	default: +		assert(0); +	} + +	if (src.negate) +		sr |= NV30_FP_REG_NEGATE; + +	if (src.abs) +		hw[1] |= (1 << (29 + pos)); + +	sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) | +	       (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) | +	       (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) | +	       (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT)); + +	hw[pos + 1] |= sr; +} + +static void +emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst) +{ +	struct nv30_fragment_program *fp = fpc->fp; +	uint32_t *hw = &fp->insn[fpc->inst_offset]; + +	switch (dst.type) { +	case NV30SR_TEMP: +		if (fpc->num_regs < (dst.index + 1)) +			fpc->num_regs = dst.index + 1; +		break; +	case NV30SR_OUTPUT: +		if (dst.index == 1) { +			fp->fp_control |= 0xe; +		} else { +			hw[0] |= NV30_FP_OP_OUT_REG_HALF; +		} +		break; +	case NV30SR_NONE: +		hw[0] |= (1 << 30); +		break; +	default: +		assert(0); +	} + +	hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT); +} + +static void +nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, +	      struct nv30_sreg dst, int mask, +	      struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) +{ +	struct nv30_fragment_program *fp = fpc->fp; +	uint32_t *hw; + +	fpc->inst_offset = fp->insn_len; +	grow_insns(fpc, 4); +	hw = &fp->insn[fpc->inst_offset]; +	memset(hw, 0, sizeof(uint32_t) * 4); + +	if (op == NV30_FP_OP_OPCODE_KIL) +		fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; +	hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT); +	hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT); +	hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT); + +	if (sat) +		hw[0] |= NV30_FP_OP_OUT_SAT; + +	if (dst.cc_update) +		hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE; +	hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT); +	hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) | +		  (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) | +		  (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) | +		  (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT)); + +	emit_dst(fpc, dst); +	emit_src(fpc, 0, s0); +	emit_src(fpc, 1, s1); +	emit_src(fpc, 2, s2); +} + +static void +nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit, +	    struct nv30_sreg dst, int mask, +	    struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) +{ +	struct nv30_fragment_program *fp = fpc->fp; + +	nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); + +	fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT); +	fp->samplers |= (1 << unit); +} + +static INLINE struct nv30_sreg +tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) +{ +	struct nv30_sreg src; + +	switch (fsrc->SrcRegister.File) { +	case TGSI_FILE_INPUT: +		src = nv30_sr(NV30SR_INPUT, +			      fpc->attrib_map[fsrc->SrcRegister.Index]); +		break; +	case TGSI_FILE_CONSTANT: +		src = constant(fpc, fsrc->SrcRegister.Index, NULL); +		break; +	case TGSI_FILE_IMMEDIATE: +		assert(fsrc->SrcRegister.Index < fpc->nr_imm); +		src = fpc->imm[fsrc->SrcRegister.Index]; +		break; +	case TGSI_FILE_TEMPORARY: +		src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1); +		if (fpc->high_temp < src.index) +			fpc->high_temp = src.index; +		break; +	/* This is clearly insane, but gallium hands us shaders like this. +	 * Luckily fragprog results are just temp regs.. +	 */ +	case TGSI_FILE_OUTPUT: +		if (fsrc->SrcRegister.Index == fpc->colour_id) +			return nv30_sr(NV30SR_OUTPUT, 0); +		else +			return nv30_sr(NV30SR_OUTPUT, 1); +		break; +	default: +		NOUVEAU_ERR("bad src file\n"); +		break; +	} + +	src.abs = fsrc->SrcRegisterExtMod.Absolute; +	src.negate = fsrc->SrcRegister.Negate; +	src.swz[0] = fsrc->SrcRegister.SwizzleX; +	src.swz[1] = fsrc->SrcRegister.SwizzleY; +	src.swz[2] = fsrc->SrcRegister.SwizzleZ; +	src.swz[3] = fsrc->SrcRegister.SwizzleW; +	return src; +} + +static INLINE struct nv30_sreg +tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) { +	int idx; + +	switch (fdst->DstRegister.File) { +	case TGSI_FILE_OUTPUT: +		if (fdst->DstRegister.Index == fpc->colour_id) +			return nv30_sr(NV30SR_OUTPUT, 0); +		else +			return nv30_sr(NV30SR_OUTPUT, 1); +		break; +	case TGSI_FILE_TEMPORARY: +		idx = fdst->DstRegister.Index + 1; +		if (fpc->high_temp < idx) +			fpc->high_temp = idx; +		return nv30_sr(NV30SR_TEMP, idx); +	case TGSI_FILE_NULL: +		return nv30_sr(NV30SR_NONE, 0); +	default: +		NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); +		return nv30_sr(NV30SR_NONE, 0); +	} +} + +static INLINE int +tgsi_mask(uint tgsi) +{ +	int mask = 0; + +	if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; +	if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; +	if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; +	if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; +	return mask; +} + +static boolean +src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, +	       struct nv30_sreg *src) +{ +	const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); +	struct nv30_sreg tgsi = tgsi_src(fpc, fsrc); +	uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; +	uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, +			fsrc->SrcRegisterExtSwz.NegateY, +			fsrc->SrcRegisterExtSwz.NegateZ, +			fsrc->SrcRegisterExtSwz.NegateW }; +	uint c; + +	for (c = 0; c < 4; c++) { +		switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { +		case TGSI_EXTSWIZZLE_X: +		case TGSI_EXTSWIZZLE_Y: +		case TGSI_EXTSWIZZLE_Z: +		case TGSI_EXTSWIZZLE_W: +			mask |= (1 << c); +			break; +		case TGSI_EXTSWIZZLE_ZERO: +			zero_mask |= (1 << c); +			tgsi.swz[c] = SWZ_X; +			break; +		case TGSI_EXTSWIZZLE_ONE: +			one_mask |= (1 << c); +			tgsi.swz[c] = SWZ_X; +			break; +		default: +			assert(0); +		} + +		if (!tgsi.negate && neg[c]) +			neg_mask |= (1 << c); +	} + +	if (mask == MASK_ALL && !neg_mask) +		return TRUE; + +	*src = temp(fpc); + +	if (mask) +		arith(fpc, 0, MOV, *src, mask, tgsi, none, none); + +	if (zero_mask) +		arith(fpc, 0, SFL, *src, zero_mask, *src, none, none); + +	if (one_mask) +		arith(fpc, 0, STR, *src, one_mask, *src, none, none); + +	if (neg_mask) { +		struct nv30_sreg one = temp(fpc); +		arith(fpc, 0, STR, one, neg_mask, one, none, none); +		arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none); +	} + +	return FALSE; +} + +static boolean +nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, +				const struct tgsi_full_instruction *finst) +{ +	const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); +	struct nv30_sreg src[3], dst, tmp; +	int mask, sat, unit = 0; +	int ai = -1, ci = -1; +	int i; + +	if (finst->Instruction.Opcode == TGSI_OPCODE_END) +		return TRUE; + +	fpc->temp_temp_count = 0; +	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { +		const struct tgsi_full_src_register *fsrc; + +		fsrc = &finst->FullSrcRegisters[i]; +		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { +			src[i] = tgsi_src(fpc, fsrc); +		} +	} + +	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { +		const struct tgsi_full_src_register *fsrc; + +		fsrc = &finst->FullSrcRegisters[i]; + +		switch (fsrc->SrcRegister.File) { +		case TGSI_FILE_INPUT: +		case TGSI_FILE_CONSTANT: +		case TGSI_FILE_TEMPORARY: +			if (!src_native_swz(fpc, fsrc, &src[i])) +				continue; +			break; +		default: +			break; +		} + +		switch (fsrc->SrcRegister.File) { +		case TGSI_FILE_INPUT: +			if (ai == -1 || ai == fsrc->SrcRegister.Index) { +				ai = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(fpc, fsrc); +			} else { +				NOUVEAU_MSG("extra src attr %d\n", +					 fsrc->SrcRegister.Index); +				src[i] = temp(fpc); +				arith(fpc, 0, MOV, src[i], MASK_ALL, +				      tgsi_src(fpc, fsrc), none, none); +			} +			break; +		case TGSI_FILE_CONSTANT: +		case TGSI_FILE_IMMEDIATE: +			if (ci == -1 || ci == fsrc->SrcRegister.Index) { +				ci = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(fpc, fsrc); +			} else { +				src[i] = temp(fpc); +				arith(fpc, 0, MOV, src[i], MASK_ALL, +				      tgsi_src(fpc, fsrc), none, none); +			} +			break; +		case TGSI_FILE_TEMPORARY: +			/* handled above */ +			break; +		case TGSI_FILE_SAMPLER: +			unit = fsrc->SrcRegister.Index; +			break; +		case TGSI_FILE_OUTPUT: +			break; +		default: +			NOUVEAU_ERR("bad src file\n"); +			return FALSE; +		} +	} + +	dst  = tgsi_dst(fpc, &finst->FullDstRegisters[0]); +	mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); +	sat  = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); + +	switch (finst->Instruction.Opcode) { +	case TGSI_OPCODE_ABS: +		arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); +		break; +	case TGSI_OPCODE_ADD: +		arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_CMP: +		tmp = temp(fpc); +		arith(fpc, sat, MOV, dst, mask, src[2], none, none); +		tmp.cc_update = 1; +		arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); +		dst.cc_test = NV30_VP_INST_COND_LT; +		arith(fpc, sat, MOV, dst, mask, src[1], none, none); +		break; +	case TGSI_OPCODE_COS: +		arith(fpc, sat, COS, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_DP3: +		arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DP4: +		arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DPH: +		tmp = temp(fpc); +		arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); +		arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), +		      swz(src[1], W, W, W, W), none); +		break; +	case TGSI_OPCODE_DST: +		arith(fpc, sat, DST, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_EX2: +		arith(fpc, sat, EX2, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_FLR: +		arith(fpc, sat, FLR, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_FRC: +		arith(fpc, sat, FRC, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_KILP: +		arith(fpc, 0, KIL, none, 0, none, none, none); +		break; +	case TGSI_OPCODE_KIL: +		dst = nv30_sr(NV30SR_NONE, 0); +		dst.cc_update = 1; +		arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); +		dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT; +		arith(fpc, 0, KIL, dst, 0, none, none, none); +		break; +	case TGSI_OPCODE_LG2: +		arith(fpc, sat, LG2, dst, mask, src[0], none, none); +		break; +//	case TGSI_OPCODE_LIT: +	case TGSI_OPCODE_LRP: +		arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]); +		break; +	case TGSI_OPCODE_MAD: +		arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); +		break; +	case TGSI_OPCODE_MAX: +		arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_MIN: +		arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_MOV: +		arith(fpc, sat, MOV, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_MUL: +		arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_NOISE1: +	case TGSI_OPCODE_NOISE2: +	case TGSI_OPCODE_NOISE3: +	case TGSI_OPCODE_NOISE4: +		arith(fpc, sat, SFL, dst, mask, none, none, none); +		break; +	case TGSI_OPCODE_POW: +		arith(fpc, sat, POW, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_RCP: +		arith(fpc, sat, RCP, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_RET: +		assert(0); +		break; +	case TGSI_OPCODE_RFL: +		arith(fpc, 0, RFL, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_RSQ: +		arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); +		break; +	case TGSI_OPCODE_SCS: +		if (mask & MASK_X) { +			arith(fpc, sat, COS, dst, MASK_X, +			      swz(src[0], X, X, X, X), none, none); +		} +		if (mask & MASK_Y) { +			arith(fpc, sat, SIN, dst, MASK_Y, +			      swz(src[0], X, X, X, X), none, none); +		} +		break; +	case TGSI_OPCODE_SIN: +		arith(fpc, sat, SIN, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_SGE: +		arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SGT: +		arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SLT: +		arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SUB: +		arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); +		break; +	case TGSI_OPCODE_TEX: +		tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_TXB: +		tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_TXP: +		tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_XPD: +		tmp = temp(fpc); +		arith(fpc, 0, MUL, tmp, mask, +		      swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); +		arith(fpc, sat, MAD, dst, (mask & ~MASK_W), +		      swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), +		      neg(tmp)); +		break; +	default: +		NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); +		return FALSE; +	} + +	return TRUE; +} + +static boolean +nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, +				const struct tgsi_full_declaration *fdec) +{ +	int hw; + +	switch (fdec->Semantic.SemanticName) { +	case TGSI_SEMANTIC_POSITION: +		hw = NV30_FP_OP_INPUT_SRC_POSITION; +		break; +	case TGSI_SEMANTIC_COLOR: +		if (fdec->Semantic.SemanticIndex == 0) { +			hw = NV30_FP_OP_INPUT_SRC_COL0; +		} else +		if (fdec->Semantic.SemanticIndex == 1) { +			hw = NV30_FP_OP_INPUT_SRC_COL1; +		} else { +			NOUVEAU_ERR("bad colour semantic index\n"); +			return FALSE; +		} +		break; +	case TGSI_SEMANTIC_FOG: +		hw = NV30_FP_OP_INPUT_SRC_FOGC; +		break; +	case TGSI_SEMANTIC_GENERIC: +		if (fdec->Semantic.SemanticIndex <= 7) { +			hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic. +						     SemanticIndex); +		} else { +			NOUVEAU_ERR("bad generic semantic index\n"); +			return FALSE; +		} +		break; +	default: +		NOUVEAU_ERR("bad input semantic\n"); +		return FALSE; +	} + +	fpc->attrib_map[fdec->DeclarationRange.First] = hw; +	return TRUE; +} + +static boolean +nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, +				const struct tgsi_full_declaration *fdec) +{ +	switch (fdec->Semantic.SemanticName) { +	case TGSI_SEMANTIC_POSITION: +		fpc->depth_id = fdec->DeclarationRange.First; +		break; +	case TGSI_SEMANTIC_COLOR: +		fpc->colour_id = fdec->DeclarationRange.First; +		break; +	default: +		NOUVEAU_ERR("bad output semantic\n"); +		return FALSE; +	} + +	return TRUE; +} + +static boolean +nv30_fragprog_prepare(struct nv30_fpc *fpc) +{ +	struct tgsi_parse_context p; +	/*int high_temp = -1, i;*/ + +	tgsi_parse_init(&p, fpc->fp->pipe.tokens); +	while (!tgsi_parse_end_of_tokens(&p)) { +		const union tgsi_full_token *tok = &p.FullToken; + +		tgsi_parse_token(&p); +		switch(tok->Token.Type) { +		case TGSI_TOKEN_TYPE_DECLARATION: +		{ +			const struct tgsi_full_declaration *fdec; +			fdec = &p.FullToken.FullDeclaration; +			switch (fdec->Declaration.File) { +			case TGSI_FILE_INPUT: +				if (!nv30_fragprog_parse_decl_attrib(fpc, fdec)) +					goto out_err; +				break; +			case TGSI_FILE_OUTPUT: +				if (!nv30_fragprog_parse_decl_output(fpc, fdec)) +					goto out_err; +				break; +			/*case TGSI_FILE_TEMPORARY: +				if (fdec->DeclarationRange.Last > high_temp) { +					high_temp = +						fdec->DeclarationRange.Last; +				} +				break;*/ +			default: +				break; +			} +		} +			break; +		case TGSI_TOKEN_TYPE_IMMEDIATE: +		{ +			struct tgsi_full_immediate *imm; +			float vals[4]; +			 +			imm = &p.FullToken.FullImmediate; +			assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); +			assert(fpc->nr_imm < MAX_IMM); + +			vals[0] = imm->u.ImmediateFloat32[0].Float; +			vals[1] = imm->u.ImmediateFloat32[1].Float; +			vals[2] = imm->u.ImmediateFloat32[2].Float; +			vals[3] = imm->u.ImmediateFloat32[3].Float; +			fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); +		} +			break; +		default: +			break; +		} +	} +	tgsi_parse_free(&p); + +	/*if (++high_temp) { +		fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg)); +		for (i = 0; i < high_temp; i++) +			fpc->r_temp[i] = temp(fpc); +		fpc->r_temps_discard = 0; +	}*/ + +	return TRUE; + +out_err: +	/*if (fpc->r_temp) +		FREE(fpc->r_temp);*/ +	tgsi_parse_free(&p); +	return FALSE; +} + +static void +nv30_fragprog_translate(struct nv30_context *nv30, +			struct nv30_fragment_program *fp) +{ +	struct tgsi_parse_context parse; +	struct nv30_fpc *fpc = NULL; + +	tgsi_dump(fp->pipe.tokens,0); + +	fpc = CALLOC(1, sizeof(struct nv30_fpc)); +	if (!fpc) +		return; +	fpc->fp = fp; +	fpc->high_temp = -1; +	fpc->num_regs = 2; + +	if (!nv30_fragprog_prepare(fpc)) { +		FREE(fpc); +		return; +	} + +	tgsi_parse_init(&parse, fp->pipe.tokens); + +	while (!tgsi_parse_end_of_tokens(&parse)) { +		tgsi_parse_token(&parse); + +		switch (parse.FullToken.Token.Type) { +		case TGSI_TOKEN_TYPE_INSTRUCTION: +		{ +			const struct tgsi_full_instruction *finst; + +			finst = &parse.FullToken.FullInstruction; +			if (!nv30_fragprog_parse_instruction(fpc, finst)) +				goto out_err; +		} +			break; +		default: +			break; +		} +	} + +	fp->fp_control |= (fpc->num_regs-1)/2; +	fp->fp_reg_control = (1<<16)|0x4; + +	/* Terminate final instruction */ +	fp->insn[fpc->inst_offset] |= 0x00000001; + +	/* Append NOP + END instruction, may or may not be necessary. */ +	fpc->inst_offset = fp->insn_len; +	grow_insns(fpc, 4); +	fp->insn[fpc->inst_offset + 0] = 0x00000001; +	fp->insn[fpc->inst_offset + 1] = 0x00000000; +	fp->insn[fpc->inst_offset + 2] = 0x00000000; +	fp->insn[fpc->inst_offset + 3] = 0x00000000; +	 +	fp->translated = TRUE; +	fp->on_hw = FALSE; +out_err: +	tgsi_parse_free(&parse); +	FREE(fpc); +} + +static void +nv30_fragprog_upload(struct nv30_context *nv30, +		     struct nv30_fragment_program *fp) +{ +	struct pipe_winsys *ws = nv30->pipe.winsys; +	const uint32_t le = 1; +	uint32_t *map; +	int i; + +	map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + +#if 0 +	for (i = 0; i < fp->insn_len; i++) { +		fflush(stdout); fflush(stderr); +		NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); +		fflush(stdout); fflush(stderr); +	} +#endif + +	if ((*(const uint8_t *)&le)) { +		for (i = 0; i < fp->insn_len; i++) { +			map[i] = fp->insn[i]; +		} +	} else { +		/* Weird swapping for big-endian chips */ +		for (i = 0; i < fp->insn_len; i++) { +			map[i] = ((fp->insn[i] & 0xffff) << 16) | +				  ((fp->insn[i] >> 16) & 0xffff); +		} +	} + +	ws->buffer_unmap(ws, fp->buffer); +} + +static boolean +nv30_fragprog_validate(struct nv30_context *nv30) +{ +	struct nv30_fragment_program *fp = nv30->fragprog; +	struct pipe_buffer *constbuf = +		nv30->constbuf[PIPE_SHADER_FRAGMENT]; +	struct pipe_winsys *ws = nv30->pipe.winsys; +	struct nouveau_stateobj *so; +	boolean new_consts = FALSE; +	int i; + +	if (fp->translated) +		goto update_constants; + +	/*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/ +	nv30_fragprog_translate(nv30, fp); +	if (!fp->translated) { +		/*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/ +		return FALSE; +	} + +	fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); +	nv30_fragprog_upload(nv30, fp); + +	so = so_new(8, 1); +	so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); +	so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | +		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, +		  NV34TCL_FP_ACTIVE_PROGRAM_DMA0, NV34TCL_FP_ACTIVE_PROGRAM_DMA1); +	so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1); +	so_data  (so, fp->fp_control); +	so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1); +	so_data  (so, fp->fp_reg_control); +	so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1); +	so_data  (so, fp->samplers); +	so_ref(so, &fp->so); + +update_constants: +	if (fp->nr_consts) { +		float *map; +		 +		map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); +		for (i = 0; i < fp->nr_consts; i++) { +			struct nv30_fragment_program_data *fpd = &fp->consts[i]; +			uint32_t *p = &fp->insn[fpd->offset]; +			uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; + +			if (!memcmp(p, cb, 4 * sizeof(float))) +				continue; +			memcpy(p, cb, 4 * sizeof(float)); +			new_consts = TRUE; +		} +		ws->buffer_unmap(ws, constbuf); + +		if (new_consts) +			nv30_fragprog_upload(nv30, fp); +	} + +	if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) { +		so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]); +		return TRUE; +	} + +	return FALSE; +} + +void +nv30_fragprog_destroy(struct nv30_context *nv30, +		      struct nv30_fragment_program *fp) +{ +	if (fp->insn_len) +		FREE(fp->insn); +} + +struct nv30_state_entry nv30_state_fragprog = { +	.validate = nv30_fragprog_validate, +	.dirty = { +		.pipe = NV30_NEW_FRAGPROG, +		.hw = NV30_STATE_FRAGPROG +	} +}; diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c new file mode 100644 index 0000000000..b1d2663af3 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -0,0 +1,163 @@ +#include "nv30_context.h" +#include "nouveau/nouveau_util.h" + +#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w)                        \ +{                                                                              \ +  TRUE,                                                                        \ +  PIPE_FORMAT_##m,                                                             \ +  NV34TCL_TX_FORMAT_FORMAT_##tf,                                               \ +  (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y |           \ +   NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w |           \ +   NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y |           \ +   NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w)            \ +} + +struct nv30_texture_format { +	boolean defined; +	uint	pipe; +	int     format; +	int     swizzle; +}; + +static struct nv30_texture_format +nv30_texture_formats[] = { +	_(A8R8G8B8_UNORM, A8R8G8B8,   S1,   S1,   S1,   S1, X, Y, Z, W), +	_(A1R5G5B5_UNORM, A1R5G5B5,   S1,   S1,   S1,   S1, X, Y, Z, W), +	_(A4R4G4B4_UNORM, A4R4G4B4,   S1,   S1,   S1,   S1, X, Y, Z, W), +	_(R5G6B5_UNORM  , R5G6B5  ,   S1,   S1,   S1,  ONE, X, Y, Z, W), +	_(L8_UNORM      , L8      ,   S1,   S1,   S1,  ONE, X, X, X, X), +	_(A8_UNORM      , L8      , ZERO, ZERO, ZERO,   S1, X, X, X, X), +	_(I8_UNORM      , L8      ,   S1,   S1,   S1,   S1, X, X, X, X), +	_(A8L8_UNORM    , A8L8    ,   S1,   S1,   S1,   S1, X, X, X, Y), +//	_(Z16_UNORM     , Z16     ,   S1,   S1,   S1,  ONE, X, X, X, X), +//	_(Z24S8_UNORM   , Z24     ,   S1,   S1,   S1,  ONE, X, X, X, X), +	_(DXT1_RGB      , DXT1    ,   S1,   S1,   S1,  ONE, X, Y, Z, W), +	_(DXT1_RGBA     , DXT1    ,   S1,   S1,   S1,   S1, X, Y, Z, W), +	_(DXT3_RGBA     , DXT3    ,   S1,   S1,   S1,   S1, X, Y, Z, W), +	_(DXT5_RGBA     , DXT5    ,   S1,   S1,   S1,   S1, X, Y, Z, W), +	{}, +}; + +static struct nv30_texture_format * +nv30_fragtex_format(uint pipe_format) +{ +	struct nv30_texture_format *tf = nv30_texture_formats; +	char fs[128]; + +	while (tf->defined) { +		if (tf->pipe == pipe_format) +			return tf; +		tf++; +	} + +	NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format)); +	return NULL; +} + + +static struct nouveau_stateobj * +nv30_fragtex_build(struct nv30_context *nv30, int unit) +{ +	struct nv30_sampler_state *ps = nv30->tex_sampler[unit]; +	struct nv30_miptree *nv30mt = nv30->tex_miptree[unit]; +	struct pipe_texture *pt = &nv30mt->base; +	struct nv30_texture_format *tf; +	struct nouveau_stateobj *so; +	uint32_t txf, txs , txp; +	unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + +	tf = nv30_fragtex_format(pt->format); +	if (!tf) +		assert(0); + +	txf  = tf->format; +	txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); +	txf |= log2i(pt->width[0]) << 20; +	txf |= log2i(pt->height[0]) << 24; +	txf |= log2i(pt->depth[0]) << 28; +	txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000; + +	switch (pt->target) { +	case PIPE_TEXTURE_CUBE: +		txf |= NV34TCL_TX_FORMAT_CUBIC; +		/* fall-through */ +	case PIPE_TEXTURE_2D: +		txf |= NV34TCL_TX_FORMAT_DIMS_2D; +		break; +	case PIPE_TEXTURE_3D: +		txf |= NV34TCL_TX_FORMAT_DIMS_3D; +		break; +	case PIPE_TEXTURE_1D: +		txf |= NV34TCL_TX_FORMAT_DIMS_1D; +		break; +	default: +		NOUVEAU_ERR("Unknown target %d\n", pt->target); +		return NULL; +	} + +	if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { +		txp = 0; +	} else { +		txp  = nv30mt->level[0].pitch; +		txf |= (1<<13) /*FIXME: NV34TCL_TX_FORMAT_LINEAR ? */; +	} + +	txs = tf->swizzle; + +	so = so_new(16, 2); +	so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8); +	so_reloc (so, nv30mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); +	so_reloc (so, nv30mt->buffer, txf, tex_flags | NOUVEAU_BO_OR, +		  NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); +	so_data  (so, ps->wrap); +	so_data  (so, NV34TCL_TX_ENABLE_ENABLE | ps->en); +	so_data  (so, txs); +	so_data  (so, ps->filt | 0x2000 /*voodoo*/); +	so_data  (so, (pt->width[0] << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | +		       pt->height[0]); +	so_data  (so, ps->bcol); + +	return so; +} + +static boolean +nv30_fragtex_validate(struct nv30_context *nv30) +{ +	struct nv30_fragment_program *fp = nv30->fragprog; +	struct nv30_state *state = &nv30->state; +	struct nouveau_stateobj *so; +	unsigned samplers, unit; + +	samplers = state->fp_samplers & ~fp->samplers; +	while (samplers) { +		unit = ffs(samplers) - 1; +		samplers &= ~(1 << unit); + +		so = so_new(2, 0); +		so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1); +		so_data  (so, 0); +		so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); +		state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit)); +	} + +	samplers = nv30->dirty_samplers & fp->samplers; +	while (samplers) { +		unit = ffs(samplers) - 1; +		samplers &= ~(1 << unit); + +		so = nv30_fragtex_build(nv30, unit); +		so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); +		state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit)); +	} + +	nv30->state.fp_samplers = fp->samplers; +	return FALSE; +} + +struct nv30_state_entry nv30_state_fragtex = { +	.validate = nv30_fragtex_validate, +	.dirty = { +		.pipe = NV30_NEW_SAMPLER | NV30_NEW_FRAGPROG, +		.hw = 0 +	} +}; diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c new file mode 100644 index 0000000000..fe13f50ebb --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -0,0 +1,234 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv30_context.h" + +static void +nv30_miptree_layout(struct nv30_miptree *nv30mt) +{ +	struct pipe_texture *pt = &nv30mt->base; +	uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; +	uint offset = 0; +	int nr_faces, l, f; +	uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | +		                           PIPE_TEXTURE_USAGE_DEPTH_STENCIL | +		                           PIPE_TEXTURE_USAGE_RENDER_TARGET | +		                           PIPE_TEXTURE_USAGE_DISPLAY_TARGET | +		                           PIPE_TEXTURE_USAGE_PRIMARY); + +	if (pt->target == PIPE_TEXTURE_CUBE) { +		nr_faces = 6; +	} else +	if (pt->target == PIPE_TEXTURE_3D) { +		nr_faces = pt->depth[0]; +	} else { +		nr_faces = 1; +	} + +	for (l = 0; l <= pt->last_level; l++) { +		pt->width[l] = width; +		pt->height[l] = height; +		pt->depth[l] = depth; +		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); +		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + +		if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) +			nv30mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64); +		else +			nv30mt->level[l].pitch = pt->width[l] * pt->block.size; + +		nv30mt->level[l].image_offset = +			CALLOC(nr_faces, sizeof(unsigned)); + +		width  = MAX2(1, width  >> 1); +		height = MAX2(1, height >> 1); +		depth  = MAX2(1, depth  >> 1); +	} + +	for (f = 0; f < nr_faces; f++) { +		for (l = 0; l < pt->last_level; l++) { +			nv30mt->level[l].image_offset[f] = offset; + +			if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) +				offset += align(nv30mt->level[l].pitch * pt->height[l], 64); +			else +				offset += nv30mt->level[l].pitch * pt->height[l]; +		} + +		nv30mt->level[l].image_offset[f] = offset; +		offset += nv30mt->level[l].pitch * pt->height[l]; +	} + +	nv30mt->total_size = offset; +} + +static struct pipe_texture * +nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) +{ +	struct pipe_winsys *ws = pscreen->winsys; +	struct nv30_miptree *mt; + +	mt = MALLOC(sizeof(struct nv30_miptree)); +	if (!mt) +		return NULL; +	mt->base = *pt; +	mt->base.refcount = 1; +	mt->base.screen = pscreen; +	mt->shadow_tex = NULL; +	mt->shadow_surface = NULL; + +	/* Swizzled textures must be POT */ +	if (pt->width[0] & (pt->width[0] - 1) || +	    pt->height[0] & (pt->height[0] - 1)) +		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +	else +	if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | ++	                     PIPE_TEXTURE_USAGE_DISPLAY_TARGET | ++	                     PIPE_TEXTURE_USAGE_DEPTH_STENCIL)) +		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +	else +	if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) +		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +	else { +		switch (pt->format) { +		/* TODO: Figure out which formats can be swizzled */ +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_X8R8G8B8_UNORM: +		case PIPE_FORMAT_R16_SNORM: +		{ +			if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE)) +				mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + 			break; +		} +		default: +			mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +		} +	} + +	nv30_miptree_layout(mt); + +	mt->buffer = ws->buffer_create(ws, 256, +				       PIPE_BUFFER_USAGE_PIXEL | +				       NOUVEAU_BUFFER_USAGE_TEXTURE, +				       mt->total_size); +	if (!mt->buffer) { +		FREE(mt); +		return NULL; +	} + +	return &mt->base; +} + +static struct pipe_texture * +nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, +		     const unsigned *stride, struct pipe_buffer *pb) +{ +	struct nv30_miptree *mt; + +	/* Only supports 2D, non-mipmapped textures for the moment */ +	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || +	    pt->depth[0] != 1) +		return NULL; + +	mt = CALLOC_STRUCT(nv30_miptree); +	if (!mt) +		return NULL; + +	mt->base = *pt; +	mt->base.refcount = 1; +	mt->base.screen = pscreen; +	mt->level[0].pitch = stride[0]; +	mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + +	pipe_buffer_reference(pscreen, &mt->buffer, pb); +	return &mt->base; +} + +static void +nv30_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) +{ +	struct pipe_texture *pt = *ppt; +	struct nv30_miptree *mt = (struct nv30_miptree *)pt; +	int l; + +	*ppt = NULL; +	if (--pt->refcount) +		return; + +	pipe_buffer_reference(pscreen, &mt->buffer, NULL); +	for (l = 0; l <= pt->last_level; l++) { +		if (mt->level[l].image_offset) +			FREE(mt->level[l].image_offset); +	} + +	if (mt->shadow_tex) { +		if (mt->shadow_surface) +			pscreen->tex_surface_release(pscreen, &mt->shadow_surface); +		nv30_miptree_release(pscreen, &mt->shadow_tex); +	} + +	FREE(mt); +} + +static struct pipe_surface * +nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, +			 unsigned face, unsigned level, unsigned zslice, +			 unsigned flags) +{ +	struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt; +	struct pipe_surface *ps; + +	ps = CALLOC_STRUCT(pipe_surface); +	if (!ps) +		return NULL; +	pipe_texture_reference(&ps->texture, pt); +	ps->format = pt->format; +	ps->width = pt->width[level]; +	ps->height = pt->height[level]; +	ps->block = pt->block; +	ps->nblocksx = pt->nblocksx[level]; +	ps->nblocksy = pt->nblocksy[level]; +	ps->stride = nv30mt->level[level].pitch; +	ps->usage = flags; +	ps->status = PIPE_SURFACE_STATUS_DEFINED; +	ps->refcount = 1; +	ps->face = face; +	ps->level = level; +	ps->zslice = zslice; + +	if (pt->target == PIPE_TEXTURE_CUBE) { +		ps->offset = nv30mt->level[level].image_offset[face]; +	} else +	if (pt->target == PIPE_TEXTURE_3D) { +		ps->offset = nv30mt->level[level].image_offset[zslice]; +	} else { +		ps->offset = nv30mt->level[level].image_offset[0]; +	} + +	return ps; +} + +static void +nv30_miptree_surface_del(struct pipe_screen *pscreen, +			 struct pipe_surface **psurface) +{ +	struct pipe_surface *ps = *psurface; + +	*psurface = NULL; +	if (--ps->refcount > 0) +		return; + +	pipe_texture_reference(&ps->texture, NULL); +	FREE(ps); +} + +void +nv30_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ +	pscreen->texture_create = nv30_miptree_create; +	pscreen->texture_blanket = nv30_miptree_blanket; +	pscreen->texture_release = nv30_miptree_release; +	pscreen->get_tex_surface = nv30_miptree_surface_new; +	pscreen->tex_surface_release = nv30_miptree_surface_del; +} diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c new file mode 100644 index 0000000000..2f974cf5c4 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_query.c @@ -0,0 +1,122 @@ +#include "pipe/p_context.h" + +#include "nv30_context.h" + +struct nv30_query { +	struct nouveau_resource *object; +	unsigned type; +	boolean ready; +	uint64_t result; +}; + +static INLINE struct nv30_query * +nv30_query(struct pipe_query *pipe) +{ +	return (struct nv30_query *)pipe; +} + +static struct pipe_query * +nv30_query_create(struct pipe_context *pipe, unsigned query_type) +{ +	struct nv30_query *q; + +	q = CALLOC(1, sizeof(struct nv30_query)); +	q->type = query_type; + +	return (struct pipe_query *)q; +} + +static void +nv30_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	struct nv30_query *q = nv30_query(pq); + +	if (q->object) +		nv30->nvws->res_free(&q->object); +	FREE(q); +} + +static void +nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	struct nv30_query *q = nv30_query(pq); + +	assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + +	/* Happens when end_query() is called, then another begin_query() +	 * without querying the result in-between.  For now we'll wait for +	 * the existing query to notify completion, but it could be better. +	 */ +	if (q->object) { +		uint64_t tmp; +		pipe->get_query_result(pipe, pq, 1, &tmp); +	} + +	if (nv30->nvws->res_alloc(nv30->screen->query_heap, 1, NULL, &q->object)) +		assert(0); +	nv30->nvws->notifier_reset(nv30->screen->query, q->object->start); + +	BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1); +	OUT_RING  (1); +	BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1); +	OUT_RING  (1); + +	q->ready = FALSE; +} + +static void +nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	struct nv30_query *q = nv30_query(pq); + +	BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1); +	OUT_RING  ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | +		   ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); +	FIRE_RING(NULL); +} + +static boolean +nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq, +		  boolean wait, uint64_t *result) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	struct nv30_query *q = nv30_query(pq); +	struct nouveau_winsys *nvws = nv30->nvws; + +	assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); + +	if (!q->ready) { +		unsigned status; + +		status = nvws->notifier_status(nv30->screen->query, +					       q->object->start); +		if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { +			if (wait == FALSE) +				return FALSE; +			nvws->notifier_wait(nv30->screen->query, q->object->start, +					    NV_NOTIFY_STATE_STATUS_COMPLETED, +					    0); +		} + +		q->result = nvws->notifier_retval(nv30->screen->query, +						  q->object->start); +		q->ready = TRUE; +		nvws->res_free(&q->object); +	} + +	*result = q->result; +	return TRUE; +} + +void +nv30_init_query_functions(struct nv30_context *nv30) +{ +	nv30->pipe.create_query = nv30_query_create; +	nv30->pipe.destroy_query = nv30_query_destroy; +	nv30->pipe.begin_query = nv30_query_begin; +	nv30->pipe.end_query = nv30_query_end; +	nv30->pipe.get_query_result = nv30_query_result; +} diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c new file mode 100644 index 0000000000..c97a73f0b1 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -0,0 +1,401 @@ +#include "pipe/p_screen.h" +#include "util/u_simple_screen.h" + +#include "nv30_context.h" +#include "nv30_screen.h" + +#define NV30TCL_CHIPSET_3X_MASK 0x00000003 +#define NV34TCL_CHIPSET_3X_MASK 0x00000010 +#define NV35TCL_CHIPSET_3X_MASK 0x000001e0 + +static const char * +nv30_screen_get_name(struct pipe_screen *pscreen) +{ +	struct nv30_screen *screen = nv30_screen(pscreen); +	struct nouveau_device *dev = screen->nvws->channel->device; +	static char buffer[128]; + +	snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); +	return buffer; +} + +static const char * +nv30_screen_get_vendor(struct pipe_screen *pscreen) +{ +	return "nouveau"; +} + +static int +nv30_screen_get_param(struct pipe_screen *pscreen, int param) +{ +	switch (param) { +	case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: +		return 16; +	case PIPE_CAP_NPOT_TEXTURES: +		return 0; +	case PIPE_CAP_TWO_SIDED_STENCIL: +		return 1; +	case PIPE_CAP_GLSL: +		return 0; +	case PIPE_CAP_S3TC: +		return 0; +	case PIPE_CAP_ANISOTROPIC_FILTER: +		return 1; +	case PIPE_CAP_POINT_SPRITE: +		return 1; +	case PIPE_CAP_MAX_RENDER_TARGETS: +		return 2; +	case PIPE_CAP_OCCLUSION_QUERY: +		return 1; +	case PIPE_CAP_TEXTURE_SHADOW_MAP: +		return 1; +	case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: +		return 13; +	case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: +		return 10; +	case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: +		return 13; +	case PIPE_CAP_TEXTURE_MIRROR_CLAMP: +		return 0; +	case PIPE_CAP_TEXTURE_MIRROR_REPEAT: +		return 1; +	case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: +		return 0; +	case NOUVEAU_CAP_HW_VTXBUF: +	case NOUVEAU_CAP_HW_IDXBUF: +		return 1; +	default: +		NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); +		return 0; +	} +} + +static float +nv30_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ +	switch (param) { +	case PIPE_CAP_MAX_LINE_WIDTH: +	case PIPE_CAP_MAX_LINE_WIDTH_AA: +		return 10.0; +	case PIPE_CAP_MAX_POINT_WIDTH: +	case PIPE_CAP_MAX_POINT_WIDTH_AA: +		return 64.0; +	case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: +		return 8.0; +	case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: +		return 4.0; +	default: +		NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); +		return 0.0; +	} +} + +static boolean +nv30_screen_surface_format_supported(struct pipe_screen *pscreen, +				     enum pipe_format format, +				     enum pipe_texture_target target, +				     unsigned tex_usage, unsigned geom_flags) +{ +	if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { +		switch (format) { +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_R5G6B5_UNORM: +		case PIPE_FORMAT_Z24S8_UNORM: +		case PIPE_FORMAT_Z16_UNORM: +			return TRUE; +		default: +			break; +		} +	} else { +		switch (format) { +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_A1R5G5B5_UNORM: +		case PIPE_FORMAT_A4R4G4B4_UNORM: +		case PIPE_FORMAT_R5G6B5_UNORM: +		case PIPE_FORMAT_L8_UNORM: +		case PIPE_FORMAT_A8_UNORM: +		case PIPE_FORMAT_I8_UNORM: +		case PIPE_FORMAT_A8L8_UNORM: +		case PIPE_FORMAT_Z16_UNORM: +		case PIPE_FORMAT_Z24S8_UNORM: +			return TRUE; +		default: +			break; +		} +	} + +	return FALSE; +} + +static struct pipe_buffer * +nv30_surface_buffer(struct pipe_surface *surf) +{ +	struct nv30_miptree *mt = (struct nv30_miptree *)surf->texture; + +	return mt->buffer; +} + +static void * +nv30_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, +		 unsigned flags ) +{ +	struct pipe_winsys	*ws = screen->winsys; +	struct pipe_surface	*surface_to_map; +	void			*map; + +	if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { +		struct nv30_miptree *mt = (struct nv30_miptree *)surface->texture; + +		if (!mt->shadow_tex) { +			unsigned old_tex_usage = surface->texture->tex_usage; +			surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR | +			                              PIPE_TEXTURE_USAGE_DYNAMIC; +			mt->shadow_tex = screen->texture_create(screen, surface->texture); +			surface->texture->tex_usage = old_tex_usage; + +			assert(mt->shadow_tex->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR); +		} + +		mt->shadow_surface = screen->get_tex_surface +		( +			screen, mt->shadow_tex, +			surface->face, surface->level, surface->zslice, +			surface->usage +		); + +		surface_to_map = mt->shadow_surface; +	} +	else +		surface_to_map = surface; + +	assert(surface_to_map); + +	map = ws->buffer_map(ws, nv30_surface_buffer(surface_to_map), flags); +	if (!map) +		return NULL; + +	return map + surface_to_map->offset; +} + +static void +nv30_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ +	struct pipe_winsys	*ws = screen->winsys; +	struct pipe_surface	*surface_to_unmap; + +	/* TODO: Copy from shadow just before push buffer is flushed instead. +	         There are probably some programs that map/unmap excessively +	         before rendering. */ +	if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { +		struct nv30_miptree *mt = (struct nv30_miptree *)surface->texture; + +		assert(mt->shadow_tex); + +		surface_to_unmap = mt->shadow_surface; +	} +	else +		surface_to_unmap = surface; + +	assert(surface_to_unmap); + +	ws->buffer_unmap(ws, nv30_surface_buffer(surface_to_unmap)); + +	if (surface_to_unmap != surface) { +		struct nv30_screen *nvscreen = nv30_screen(screen); + +		nvscreen->eng2d->copy(nvscreen->eng2d, surface, 0, 0, +		                      surface_to_unmap, 0, 0, +		                      surface->width, surface->height); + +		screen->tex_surface_release(screen, &surface_to_unmap); +	} +} + +static void +nv30_screen_destroy(struct pipe_screen *pscreen) +{ +	struct nv30_screen *screen = nv30_screen(pscreen); +	struct nouveau_winsys *nvws = screen->nvws; + +	nvws->res_free(&screen->vp_exec_heap); +	nvws->res_free(&screen->vp_data_heap); +	nvws->res_free(&screen->query_heap); +	nvws->notifier_free(&screen->query); +	nvws->notifier_free(&screen->sync); +	nvws->grobj_free(&screen->rankine); + +	FREE(pscreen); +} + +struct pipe_screen * +nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ +	struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen); +	struct nouveau_stateobj *so; +	unsigned rankine_class = 0; +	unsigned chipset = nvws->channel->device->chipset; +	int ret, i; + +	if (!screen) +		return NULL; +	screen->nvws = nvws; + +	/* 2D engine setup */ +	screen->eng2d = nv04_surface_2d_init(nvws); +	screen->eng2d->buf = nv30_surface_buffer; + +	/* 3D object */ +	switch (chipset & 0xf0) { +	case 0x30: +		if (NV30TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) +			rankine_class = 0x0397; +		else +		if (NV34TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) +			rankine_class = 0x0697; +		else +		if (NV35TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) +			rankine_class = 0x0497; +		break; +	default: +		break; +	} + +	if (!rankine_class) { +		NOUVEAU_ERR("Unknown nv3x chipset: nv%02x\n", chipset); +		return NULL; +	} + +	ret = nvws->grobj_alloc(nvws, rankine_class, &screen->rankine); +	if (ret) { +		NOUVEAU_ERR("Error creating 3D object: %d\n", ret); +		return FALSE; +	} + +	/* Notifier for sync purposes */ +	ret = nvws->notifier_alloc(nvws, 1, &screen->sync); +	if (ret) { +		NOUVEAU_ERR("Error creating notifier object: %d\n", ret); +		nv30_screen_destroy(&screen->pipe); +		return NULL; +	} + +	/* Query objects */ +	ret = nvws->notifier_alloc(nvws, 32, &screen->query); +	if (ret) { +		NOUVEAU_ERR("Error initialising query objects: %d\n", ret); +		nv30_screen_destroy(&screen->pipe); +		return NULL; +	} + +	ret = nvws->res_init(&screen->query_heap, 0, 32); +	if (ret) { +		NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); +		nv30_screen_destroy(&screen->pipe); +		return NULL; +	} + +	/* Vtxprog resources */ +	if (nvws->res_init(&screen->vp_exec_heap, 0, 256) || +	    nvws->res_init(&screen->vp_data_heap, 0, 256)) { +		nv30_screen_destroy(&screen->pipe); +		return NULL; +	} + +	/* Static rankine initialisation */ +	so = so_new(128, 0); +	so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1); +	so_data  (so, screen->sync->handle); +	so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2); +	so_data  (so, nvws->channel->vram->handle); +	so_data  (so, nvws->channel->gart->handle); +	so_method(so, screen->rankine, NV34TCL_DMA_COLOR1, 1); +	so_data  (so, nvws->channel->vram->handle); +	so_method(so, screen->rankine, NV34TCL_DMA_COLOR0, 2); +	so_data  (so, nvws->channel->vram->handle); +	so_data  (so, nvws->channel->vram->handle); +	so_method(so, screen->rankine, NV34TCL_DMA_VTXBUF0, 2); +	so_data  (so, nvws->channel->vram->handle); +	so_data  (so, nvws->channel->gart->handle); +/*	so_method(so, screen->rankine, NV34TCL_DMA_FENCE, 2); +	so_data  (so, 0); +	so_data  (so, screen->query->handle);*/ +	so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY7, 1); +	so_data  (so, nvws->channel->vram->handle); +	so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY8, 1); +	so_data  (so, nvws->channel->vram->handle); + +	for (i=1; i<8; i++) { +		so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1); +		so_data  (so, 0); +		so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_VERT(i), 1); +		so_data  (so, 0); +	} + +	so_method(so, screen->rankine, 0x220, 1); +	so_data  (so, 1); + +	so_method(so, screen->rankine, 0x03b0, 1); +	so_data  (so, 0x00100000); +	so_method(so, screen->rankine, 0x1454, 1); +	so_data  (so, 0); +	so_method(so, screen->rankine, 0x1d80, 1); +	so_data  (so, 3); +	so_method(so, screen->rankine, 0x1450, 1); +	so_data  (so, 0x00030004); + +	/* NEW */ +	so_method(so, screen->rankine, 0x1e98, 1); +	so_data  (so, 0); +	so_method(so, screen->rankine, 0x17e0, 3); +	so_data  (so, fui(0.0)); +	so_data  (so, fui(0.0)); +	so_data  (so, fui(1.0)); +	so_method(so, screen->rankine, 0x1f80, 16); +	for (i=0; i<16; i++) { +		so_data  (so, (i==8) ? 0x0000ffff : 0); +	} + +	so_method(so, screen->rankine, 0x120, 3); +	so_data  (so, 0); +	so_data  (so, 1); +	so_data  (so, 2); + +	so_method(so, screen->rankine, 0x1d88, 1); +	so_data  (so, 0x00001200); + +	so_method(so, screen->rankine, NV34TCL_RC_ENABLE, 1); +	so_data  (so, 0); + +	so_method(so, screen->rankine, NV34TCL_DEPTH_RANGE_NEAR, 2); +	so_data  (so, fui(0.0)); +	so_data  (so, fui(1.0)); + +	so_method(so, screen->rankine, NV34TCL_MULTISAMPLE_CONTROL, 1); +	so_data  (so, 0xffff0000); + +	/* enables use of vp rather than fixed-function somehow */ +	so_method(so, screen->rankine, 0x1e94, 1); +	so_data  (so, 0x13); + +	so_emit(nvws, so); +	so_ref(NULL, &so); +	nvws->push_flush(nvws, 0, NULL); + +	screen->pipe.winsys = ws; +	screen->pipe.destroy = nv30_screen_destroy; + +	screen->pipe.get_name = nv30_screen_get_name; +	screen->pipe.get_vendor = nv30_screen_get_vendor; +	screen->pipe.get_param = nv30_screen_get_param; +	screen->pipe.get_paramf = nv30_screen_get_paramf; + +	screen->pipe.is_format_supported = nv30_screen_surface_format_supported; + +	screen->pipe.surface_map = nv30_surface_map; +	screen->pipe.surface_unmap = nv30_surface_unmap; + +	nv30_screen_init_miptree_functions(&screen->pipe); +	u_simple_screen_init(&screen->pipe); + +	return &screen->pipe; +} diff --git a/src/gallium/drivers/nv30/nv30_screen.h b/src/gallium/drivers/nv30/nv30_screen.h new file mode 100644 index 0000000000..b11e470f94 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_screen.h @@ -0,0 +1,37 @@ +#ifndef __NV30_SCREEN_H__ +#define __NV30_SCREEN_H__ + +#include "pipe/p_screen.h" +#include "nv04/nv04_surface_2d.h" + +struct nv30_screen { +	struct pipe_screen pipe; + +	struct nouveau_winsys *nvws; + +	unsigned cur_pctx; + +	/* HW graphics objects */ +	struct nv04_surface_2d *eng2d; +	struct nouveau_grobj *rankine; +	struct nouveau_notifier *sync; + +	/* Query object resources */ +	struct nouveau_notifier *query; +	struct nouveau_resource *query_heap; + +	/* Vtxprog resources */ +	struct nouveau_resource *vp_exec_heap; +	struct nouveau_resource *vp_data_heap; + +	/* Current 3D state of channel */ +	struct nouveau_stateobj *state[NV30_STATE_MAX]; +}; + +static INLINE struct nv30_screen * +nv30_screen(struct pipe_screen *screen) +{ +	return (struct nv30_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv30/nv30_shader.h b/src/gallium/drivers/nv30/nv30_shader.h new file mode 100644 index 0000000000..dd3a36f78f --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_shader.h @@ -0,0 +1,490 @@ +#ifndef __NV30_SHADER_H__ +#define __NV30_SHADER_H__ + +/* Vertex programs instruction set + * + * 128bit opcodes, split into 4 32-bit ones for ease of use. + * + * Non-native instructions + *   ABS - MOV + NV40_VP_INST0_DEST_ABS + *   POW - EX2 + MUL + LG2 + *   SUB - ADD, second source negated + *   SWZ - MOV + *   XPD -   + * + * Register access + *   - Only one INPUT can be accessed per-instruction (move extras into TEMPs) + *   - Only one CONST can be accessed per-instruction (move extras into TEMPs) + * + * Relative Addressing + *   According to the value returned for + *   MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB + * + *   there are only two address registers available.  The destination in the + *   ARL instruction is set to TEMP <n> (The temp isn't actually written). + * + *   When using vanilla ARB_v_p, the proprietary driver will squish both the + *   available ADDRESS regs into the first hardware reg in the X and Y + *   components. + * + *   To use an address reg as an index into consts, the CONST_SRC is set to + *   (const_base + offset) and INDEX_CONST is set. + * + *   To access the second address reg use ADDR_REG_SELECT_1. A particular + *   component of the address regs is selected with ADDR_SWZ. + * + *   Only one address register can be accessed per instruction. + * + * Conditional execution (see NV_vertex_program{2,3} for details) Conditional + * execution of an instruction is enabled by setting COND_TEST_ENABLE, and + * selecting the condition which will allow the test to pass with + * COND_{FL,LT,...}.  It is possible to swizzle the values in the condition + * register, which allows for testing against an individual component. + * + * Branching: + * + *   The BRA/CAL instructions seem to follow a slightly different opcode + *   layout.  The destination instruction ID (IADDR) overlaps a source field. + *   Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO + *   command, and is incremented automatically on each UPLOAD_INST FIFO + *   command. + * + *   Conditional branching is achieved by using the condition tests described + *   above.  There doesn't appear to be dedicated looping instructions, but + *   this can be done using a temp reg + conditional branching. + * + *   Subroutines may be uploaded before the main program itself, but the first + *   executed instruction is determined by the PROGRAM_START_ID FIFO command. + * + */ + +/* DWORD 0 */ + +#define NV30_VP_INST_ADDR_REG_SELECT_1        (1 << 24) +#define NV30_VP_INST_SRC2_ABS           (1 << 23) /* guess */ +#define NV30_VP_INST_SRC1_ABS           (1 << 22) /* guess */ +#define NV30_VP_INST_SRC0_ABS           (1 << 21) /* guess */ +#define NV30_VP_INST_VEC_RESULT         (1 << 20) +#define NV30_VP_INST_DEST_TEMP_ID_SHIFT        16 +#define NV30_VP_INST_DEST_TEMP_ID_MASK        (0x0F << 16) +#define NV30_VP_INST_COND_UPDATE_ENABLE        (1<<15) +#define NV30_VP_INST_VEC_DEST_TEMP_MASK      (0xF << 16) +#define NV30_VP_INST_COND_TEST_ENABLE        (1<<14) +#define NV30_VP_INST_COND_SHIFT          11 +#define NV30_VP_INST_COND_MASK          (0x07 << 11) +#  define NV30_VP_INST_COND_FL  0 /* guess */   +#  define NV30_VP_INST_COND_LT  1   +#  define NV30_VP_INST_COND_EQ  2 +#  define NV30_VP_INST_COND_LE  3 +#  define NV30_VP_INST_COND_GT  4 +#  define NV30_VP_INST_COND_NE  5 +#  define NV30_VP_INST_COND_GE  6 +#  define NV30_VP_INST_COND_TR  7 /* guess */ +#define NV30_VP_INST_COND_SWZ_X_SHIFT        9 +#define NV30_VP_INST_COND_SWZ_X_MASK        (0x03 <<  9) +#define NV30_VP_INST_COND_SWZ_Y_SHIFT        7 +#define NV30_VP_INST_COND_SWZ_Y_MASK        (0x03 <<  7) +#define NV30_VP_INST_COND_SWZ_Z_SHIFT        5 +#define NV30_VP_INST_COND_SWZ_Z_MASK        (0x03 <<  5) +#define NV30_VP_INST_COND_SWZ_W_SHIFT        3 +#define NV30_VP_INST_COND_SWZ_W_MASK        (0x03 <<  3) +#define NV30_VP_INST_COND_SWZ_ALL_SHIFT        3 +#define NV30_VP_INST_COND_SWZ_ALL_MASK        (0xFF <<  3) +#define NV30_VP_INST_ADDR_SWZ_SHIFT        1 +#define NV30_VP_INST_ADDR_SWZ_MASK        (0x03 <<  1) +#define NV30_VP_INST_SCA_OPCODEH_SHIFT        0 +#define NV30_VP_INST_SCA_OPCODEH_MASK        (0x01 <<  0) + +/* DWORD 1 */ +#define NV30_VP_INST_SCA_OPCODEL_SHIFT        28 +#define NV30_VP_INST_SCA_OPCODEL_MASK        (0x0F << 28) +#  define NV30_VP_INST_OP_NOP  0x00 +#  define NV30_VP_INST_OP_RCP  0x02 +#  define NV30_VP_INST_OP_RCC  0x03 +#  define NV30_VP_INST_OP_RSQ  0x04 +#  define NV30_VP_INST_OP_EXP  0x05 +#  define NV30_VP_INST_OP_LOG  0x06 +#  define NV30_VP_INST_OP_LIT  0x07 +#  define NV30_VP_INST_OP_BRA  0x09 +#  define NV30_VP_INST_OP_CAL  0x0B +#  define NV30_VP_INST_OP_RET  0x0C +#  define NV30_VP_INST_OP_LG2  0x0D +#  define NV30_VP_INST_OP_EX2  0x0E +#  define NV30_VP_INST_OP_SIN  0x0F +#  define NV30_VP_INST_OP_COS  0x10 +#define NV30_VP_INST_VEC_OPCODE_SHIFT        23 +#define NV30_VP_INST_VEC_OPCODE_MASK        (0x1F << 23) +#  define NV30_VP_INST_OP_NOPV  0x00 +#  define NV30_VP_INST_OP_MOV  0x01 +#  define NV30_VP_INST_OP_MUL  0x02 +#  define NV30_VP_INST_OP_ADD  0x03 +#  define NV30_VP_INST_OP_MAD  0x04 +#  define NV30_VP_INST_OP_DP3  0x05 +#  define NV30_VP_INST_OP_DP4  0x07 +#  define NV30_VP_INST_OP_DPH  0x06 +#  define NV30_VP_INST_OP_DST  0x08 +#  define NV30_VP_INST_OP_MIN  0x09 +#  define NV30_VP_INST_OP_MAX  0x0A +#  define NV30_VP_INST_OP_SLT  0x0B +#  define NV30_VP_INST_OP_SGE  0x0C +#  define NV30_VP_INST_OP_ARL  0x0D +#  define NV30_VP_INST_OP_FRC  0x0E +#  define NV30_VP_INST_OP_FLR  0x0F +#  define NV30_VP_INST_OP_SEQ  0x10 +#  define NV30_VP_INST_OP_SFL  0x11 +#  define NV30_VP_INST_OP_SGT  0x12 +#  define NV30_VP_INST_OP_SLE  0x13 +#  define NV30_VP_INST_OP_SNE  0x14 +#  define NV30_VP_INST_OP_STR  0x15 +#  define NV30_VP_INST_OP_SSG  0x16 +#  define NV30_VP_INST_OP_ARR  0x17 +#  define NV30_VP_INST_OP_ARA  0x18 +#define NV30_VP_INST_CONST_SRC_SHIFT        14 +#define NV30_VP_INST_CONST_SRC_MASK        (0xFF << 14) +#define NV30_VP_INST_INPUT_SRC_SHIFT        9    /*NV20*/ +#define NV30_VP_INST_INPUT_SRC_MASK        (0x0F <<  9)  /*NV20*/ +#  define NV30_VP_INST_IN_POS  0    /* These seem to match the bindings specified in */ +#  define NV30_VP_INST_IN_WEIGHT  1    /* the ARB_v_p spec (2.14.3.1) */ +#  define NV30_VP_INST_IN_NORMAL  2     +#  define NV30_VP_INST_IN_COL0  3    /* Should probably confirm them all though */ +#  define NV30_VP_INST_IN_COL1  4 +#  define NV30_VP_INST_IN_FOGC  5 +#  define NV30_VP_INST_IN_TC0  8 +#  define NV30_VP_INST_IN_TC(n)  (8+n) +#define NV30_VP_INST_SRC0H_SHIFT        0    /*NV20*/ +#define NV30_VP_INST_SRC0H_MASK          (0x1FF << 0)  /*NV20*/ + +/* Please note: the IADDR fields overlap other fields because they are used + * only for branch instructions.  See Branching: label above + * + * DWORD 2 + */ +#define NV30_VP_INST_SRC0L_SHIFT        26    /*NV20*/ +#define NV30_VP_INST_SRC0L_MASK         (0x3F  <<26)  /* NV30_VP_SRC0_LOW_MASK << 26 */ +#define NV30_VP_INST_SRC1_SHIFT         11    /*NV20*/ +#define NV30_VP_INST_SRC1_MASK          (0x7FFF<<11)  /*NV20*/ +#define NV30_VP_INST_SRC2H_SHIFT        0    /*NV20*/ +#define NV30_VP_INST_SRC2H_MASK          (0x7FF << 0)  /* NV30_VP_SRC2_HIGH_MASK >> 4*/ +#define NV30_VP_INST_IADDR_SHIFT        2 +#define NV30_VP_INST_IADDR_MASK          (0xF <<  28)   /* NV30_VP_SRC2_LOW_MASK << 28 */ + +/* DWORD 3 */ +#define NV30_VP_INST_SRC2L_SHIFT        28    /*NV20*/ +#define NV30_VP_INST_SRC2L_MASK          (0x0F  <<28)  /*NV20*/ +#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT      24 +#define NV30_VP_INST_STEMP_WRITEMASK_MASK      (0x0F << 24) +#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT      20 +#define NV30_VP_INST_VTEMP_WRITEMASK_MASK      (0x0F << 20) +#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT      16 +#define NV30_VP_INST_SDEST_WRITEMASK_MASK      (0x0F << 16) +#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT      12    /*NV20*/ +#define NV30_VP_INST_VDEST_WRITEMASK_MASK      (0x0F << 12)  /*NV20*/ +#define NV30_VP_INST_DEST_SHIFT        2 +#define NV30_VP_INST_DEST_MASK        (0x0F <<  2) +#  define NV30_VP_INST_DEST_POS  0 +#  define NV30_VP_INST_DEST_BFC0  1 +#  define NV30_VP_INST_DEST_BFC1  2 +#  define NV30_VP_INST_DEST_COL0  3 +#  define NV30_VP_INST_DEST_COL1  4 +#  define NV30_VP_INST_DEST_FOGC  5 +#  define NV30_VP_INST_DEST_PSZ   6 +#  define NV30_VP_INST_DEST_TC(n)  (8+n) + +#define NV30_VP_INST_LAST                           (1 << 0) + +/* Useful to split the source selection regs into their pieces */ +#define NV30_VP_SRC0_HIGH_SHIFT                                                6 +#define NV30_VP_SRC0_HIGH_MASK                                        0x00007FC0 +#define NV30_VP_SRC0_LOW_MASK                                         0x0000003F +#define NV30_VP_SRC2_HIGH_SHIFT                                                4 +#define NV30_VP_SRC2_HIGH_MASK                                        0x00007FF0 +#define NV30_VP_SRC2_LOW_MASK                                         0x0000000F + + +/* Source-register definition - matches NV20 exactly */ +#define NV30_VP_SRC_NEGATE          (1<<14) +#define NV30_VP_SRC_SWZ_X_SHIFT        12 +#define NV30_VP_SRC_REG_SWZ_X_MASK        (0x03  <<12) +#define NV30_VP_SRC_SWZ_Y_SHIFT        10 +#define NV30_VP_SRC_REG_SWZ_Y_MASK        (0x03  <<10) +#define NV30_VP_SRC_SWZ_Z_SHIFT        8 +#define NV30_VP_SRC_REG_SWZ_Z_MASK        (0x03  << 8) +#define NV30_VP_SRC_SWZ_W_SHIFT        6 +#define NV30_VP_SRC_REG_SWZ_W_MASK        (0x03  << 6) +#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT        6 +#define NV30_VP_SRC_REG_SWZ_ALL_MASK        (0xFF  << 6) +#define NV30_VP_SRC_TEMP_SRC_SHIFT        2 +#define NV30_VP_SRC_REG_TEMP_ID_MASK        (0x0F  << 0) +#define NV30_VP_SRC_REG_TYPE_SHIFT        0 +#define NV30_VP_SRC_REG_TYPE_MASK        (0x03  << 0) +#define NV30_VP_SRC_REG_TYPE_TEMP  1 +#define NV30_VP_SRC_REG_TYPE_INPUT  2 +#define NV30_VP_SRC_REG_TYPE_CONST  3 /* guess */ + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + *   0 - Opcode, output reg/mask, ATTRIB source + *   1 - Source 0 + *   2 - Source 1 + *   3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + *     result.color == R0.xyzw + *     result.depth == R1.z + * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0 + * otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + *  + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords.  As such instructions such as: + *  + *     ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO + * is implemented simply by not writing to the relevant components of the destination. + * + * Conditional execution + *   TODO + *  + * Non-native instructions: + *   LIT + *   LRP - MAD+MAD + *   SUB - ADD, negate second source + *   RSQ - LG2 + EX2 + *   POW - LG2 + MUL + EX2 + *   SCS - COS + SIN + *   XPD + */ + +//== Opcode / Destination selection == +#define NV30_FP_OP_PROGRAM_END          (1 << 0) +#define NV30_FP_OP_OUT_REG_SHIFT        1 +#define NV30_FP_OP_OUT_REG_MASK          (31 << 1)  /* uncertain */ +/* Needs to be set when writing outputs to get expected result.. */ +#define NV30_FP_OP_OUT_REG_HALF          (1 << 7) +#define NV30_FP_OP_COND_WRITE_ENABLE        (1 << 8) +#define NV30_FP_OP_OUTMASK_SHIFT        9 +#define NV30_FP_OP_OUTMASK_MASK          (0xF << 9) +#  define NV30_FP_OP_OUT_X  (1<<9) +#  define NV30_FP_OP_OUT_Y  (1<<10) +#  define NV30_FP_OP_OUT_Z  (1<<11) +#  define NV30_FP_OP_OUT_W  (1<<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NV30_FP_OP_INPUT_SRC_SHIFT        13 +#define NV30_FP_OP_INPUT_SRC_MASK        (15 << 13) +#  define NV30_FP_OP_INPUT_SRC_POSITION  0x0 +#  define NV30_FP_OP_INPUT_SRC_COL0  0x1 +#  define NV30_FP_OP_INPUT_SRC_COL1  0x2 +#  define NV30_FP_OP_INPUT_SRC_FOGC  0x3 +#  define NV30_FP_OP_INPUT_SRC_TC0    0x4 +#  define NV30_FP_OP_INPUT_SRC_TC(n)  (0x4 + n) +#define NV30_FP_OP_TEX_UNIT_SHIFT        17 +#define NV30_FP_OP_TEX_UNIT_MASK        (0xF << 17) /* guess */ +#define NV30_FP_OP_PRECISION_SHIFT        22 +#define NV30_FP_OP_PRECISION_MASK        (3 << 22) +#   define NV30_FP_PRECISION_FP32  0 +#   define NV30_FP_PRECISION_FP16  1 +#   define NV30_FP_PRECISION_FX12  2 +#define NV30_FP_OP_OPCODE_SHIFT          24 +#define NV30_FP_OP_OPCODE_MASK          (0x3F << 24) +#  define NV30_FP_OP_OPCODE_NOP  0x00 +#  define NV30_FP_OP_OPCODE_MOV  0x01 +#  define NV30_FP_OP_OPCODE_MUL  0x02 +#  define NV30_FP_OP_OPCODE_ADD  0x03 +#  define NV30_FP_OP_OPCODE_MAD  0x04 +#  define NV30_FP_OP_OPCODE_DP3  0x05 +#  define NV30_FP_OP_OPCODE_DP4  0x06 +#  define NV30_FP_OP_OPCODE_DST  0x07 +#  define NV30_FP_OP_OPCODE_MIN  0x08 +#  define NV30_FP_OP_OPCODE_MAX  0x09 +#  define NV30_FP_OP_OPCODE_SLT  0x0A +#  define NV30_FP_OP_OPCODE_SGE  0x0B +#  define NV30_FP_OP_OPCODE_SLE  0x0C +#  define NV30_FP_OP_OPCODE_SGT  0x0D +#  define NV30_FP_OP_OPCODE_SNE  0x0E +#  define NV30_FP_OP_OPCODE_SEQ  0x0F +#  define NV30_FP_OP_OPCODE_FRC  0x10 +#  define NV30_FP_OP_OPCODE_FLR  0x11 +#  define NV30_FP_OP_OPCODE_KIL  0x12 +#  define NV30_FP_OP_OPCODE_PK4B   0x13 +#  define NV30_FP_OP_OPCODE_UP4B   0x14 +#  define NV30_FP_OP_OPCODE_DDX  0x15 /* can only write XY */ +#  define NV30_FP_OP_OPCODE_DDY  0x16 /* can only write XY */ +#  define NV30_FP_OP_OPCODE_TEX  0x17 +#  define NV30_FP_OP_OPCODE_TXP  0x18 +#  define NV30_FP_OP_OPCODE_TXD  0x19 +#  define NV30_FP_OP_OPCODE_RCP  0x1A +#  define NV30_FP_OP_OPCODE_RSQ  0x1B +#  define NV30_FP_OP_OPCODE_EX2  0x1C +#  define NV30_FP_OP_OPCODE_LG2  0x1D +#  define NV30_FP_OP_OPCODE_LIT  0x1E +#  define NV30_FP_OP_OPCODE_LRP  0x1F +#  define NV30_FP_OP_OPCODE_STR  0x20  +#  define NV30_FP_OP_OPCODE_SFL  0x21 +#  define NV30_FP_OP_OPCODE_COS  0x22 +#  define NV30_FP_OP_OPCODE_SIN  0x23 +#  define NV30_FP_OP_OPCODE_PK2H   0x24 +#  define NV30_FP_OP_OPCODE_UP2H   0x25 +#  define NV30_FP_OP_OPCODE_POW  0x26 +#  define NV30_FP_OP_OPCODE_PK4UB  0x27 +#  define NV30_FP_OP_OPCODE_UP4UB  0x28 +#  define NV30_FP_OP_OPCODE_PK2US  0x29 +#  define NV30_FP_OP_OPCODE_UP2US  0x2A +#  define NV30_FP_OP_OPCODE_DP2A   0x2E +#  define NV30_FP_OP_OPCODE_TXB  0x31 +#  define NV30_FP_OP_OPCODE_RFL  0x36 +#  define NV30_FP_OP_OPCODE_DIV  0x3A +#define NV30_FP_OP_OUT_SAT          (1 << 31) + +/* high order bits of SRC0 */ +#define NV30_FP_OP_OUT_ABS          (1 << 29) +#define NV30_FP_OP_COND_SWZ_W_SHIFT        27 +#define NV30_FP_OP_COND_SWZ_W_MASK        (3 << 27) +#define NV30_FP_OP_COND_SWZ_Z_SHIFT        25 +#define NV30_FP_OP_COND_SWZ_Z_MASK        (3 << 25) +#define NV30_FP_OP_COND_SWZ_Y_SHIFT        23 +#define NV30_FP_OP_COND_SWZ_Y_MASK        (3 << 23) +#define NV30_FP_OP_COND_SWZ_X_SHIFT        21 +#define NV30_FP_OP_COND_SWZ_X_MASK        (3 << 21) +#define NV30_FP_OP_COND_SWZ_ALL_SHIFT        21 +#define NV30_FP_OP_COND_SWZ_ALL_MASK        (0xFF << 21) +#define NV30_FP_OP_COND_SHIFT          18 +#define NV30_FP_OP_COND_MASK          (0x07 << 18) +#  define NV30_FP_OP_COND_FL  0 +#  define NV30_FP_OP_COND_LT  1 +#  define NV30_FP_OP_COND_EQ  2 +#  define NV30_FP_OP_COND_LE  3 +#  define NV30_FP_OP_COND_GT  4 +#  define NV30_FP_OP_COND_NE  5 +#  define NV30_FP_OP_COND_GE  6 +#  define NV30_FP_OP_COND_TR  7 + +/* high order bits of SRC1 */ +#define NV30_FP_OP_DST_SCALE_SHIFT        28 +#define NV30_FP_OP_DST_SCALE_MASK        (3 << 28) +#define NV30_FP_OP_DST_SCALE_1X                                                0 +#define NV30_FP_OP_DST_SCALE_2X                                                1 +#define NV30_FP_OP_DST_SCALE_4X                                                2 +#define NV30_FP_OP_DST_SCALE_8X                                                3 +#define NV30_FP_OP_DST_SCALE_INV_2X                                            5 +#define NV30_FP_OP_DST_SCALE_INV_4X                                            6 +#define NV30_FP_OP_DST_SCALE_INV_8X                                            7 + + +/* high order bits of SRC2 */ +#define NV30_FP_OP_INDEX_INPUT          (1 << 30) + +//== Register selection == +#define NV30_FP_REG_TYPE_SHIFT          0 +#define NV30_FP_REG_TYPE_MASK          (3 << 0) +#  define NV30_FP_REG_TYPE_TEMP  0 +#  define NV30_FP_REG_TYPE_INPUT  1 +#  define NV30_FP_REG_TYPE_CONST  2 +#define NV30_FP_REG_SRC_SHIFT          2 /* uncertain */ +#define NV30_FP_REG_SRC_MASK          (31 << 2) +#define NV30_FP_REG_SRC_HALF          (1 << 8) +#define NV30_FP_REG_SWZ_ALL_SHIFT        9 +#define NV30_FP_REG_SWZ_ALL_MASK        (255 << 9) +#define NV30_FP_REG_SWZ_X_SHIFT          9 +#define NV30_FP_REG_SWZ_X_MASK          (3 << 9) +#define NV30_FP_REG_SWZ_Y_SHIFT          11 +#define NV30_FP_REG_SWZ_Y_MASK          (3 << 11) +#define NV30_FP_REG_SWZ_Z_SHIFT          13 +#define NV30_FP_REG_SWZ_Z_MASK          (3 << 13) +#define NV30_FP_REG_SWZ_W_SHIFT          15 +#define NV30_FP_REG_SWZ_W_MASK          (3 << 15) +#  define NV30_FP_SWIZZLE_X  0 +#  define NV30_FP_SWIZZLE_Y  1 +#  define NV30_FP_SWIZZLE_Z  2 +#  define NV30_FP_SWIZZLE_W  3 +#define NV30_FP_REG_NEGATE          (1 << 17) + +#define NV30SR_NONE	0 +#define NV30SR_OUTPUT	1 +#define NV30SR_INPUT	2 +#define NV30SR_TEMP	3 +#define NV30SR_CONST	4 + +struct nv30_sreg { +	int type; +	int index; + +	int dst_scale; + +	int negate; +	int abs; +	int swz[4]; + +	int cc_update; +	int cc_update_reg; +	int cc_test; +	int cc_test_reg; +	int cc_swz[4]; +}; + +static INLINE struct nv30_sreg +nv30_sr(int type, int index) +{ +	struct nv30_sreg temp = { +		.type = type, +		.index = index, +		.dst_scale = DEF_SCALE, +		.abs = 0, +		.negate = 0, +		.swz = { 0, 1, 2, 3 }, +		.cc_update = 0, +		.cc_update_reg = 0, +		.cc_test = DEF_CTEST, +		.cc_test_reg = 0, +		.cc_swz = { 0, 1, 2, 3 }, +	}; +	return temp; +} + +static INLINE struct nv30_sreg +nv30_sr_swz(struct nv30_sreg src, int x, int y, int z, int w) +{ +	struct nv30_sreg dst = src; + +	dst.swz[SWZ_X] = src.swz[x]; +	dst.swz[SWZ_Y] = src.swz[y]; +	dst.swz[SWZ_Z] = src.swz[z]; +	dst.swz[SWZ_W] = src.swz[w]; +	return dst; +} + +static INLINE struct nv30_sreg +nv30_sr_neg(struct nv30_sreg src) +{ +	src.negate = !src.negate; +	return src; +} + +static INLINE struct nv30_sreg +nv30_sr_abs(struct nv30_sreg src) +{ +	src.abs = 1; +	return src; +} + +static INLINE struct nv30_sreg +nv30_sr_scale(struct nv30_sreg src, int scale) +{ +	src.dst_scale = scale; +	return src; +} + +#endif diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c new file mode 100644 index 0000000000..26147565a5 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -0,0 +1,725 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv30_context.h" +#include "nv30_state.h" + +static void * +nv30_blend_state_create(struct pipe_context *pipe, +			const struct pipe_blend_state *cso) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	struct nouveau_grobj *rankine = nv30->screen->rankine; +	struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso)); +	struct nouveau_stateobj *so = so_new(16, 0); + +	if (cso->blend_enable) { +		so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3); +		so_data  (so, 1); +		so_data  (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) | +			       nvgl_blend_func(cso->rgb_src_factor)); +		so_data  (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 | +			      nvgl_blend_func(cso->rgb_dst_factor)); +		/* FIXME: Gallium assumes GL_EXT_blend_func_separate. +		   It is not the case for NV30 */ +		so_method(so, rankine, NV34TCL_BLEND_EQUATION, 1); +		so_data  (so, nvgl_blend_eqn(cso->rgb_func)); +	} else { +		so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 1); +		so_data  (so, 0); +	} + +	so_method(so, rankine, NV34TCL_COLOR_MASK, 1); +	so_data  (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | +		       ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | +		       ((cso->colormask & PIPE_MASK_G) ? (0x01 <<  8) : 0) | +		       ((cso->colormask & PIPE_MASK_B) ? (0x01 <<  0) : 0))); + +	if (cso->logicop_enable) { +		so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); +		so_data  (so, 1); +		so_data  (so, nvgl_logicop_func(cso->logicop_func)); +	} else { +		so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1); +		so_data  (so, 0); +	} + +	so_method(so, rankine, NV34TCL_DITHER_ENABLE, 1); +	so_data  (so, cso->dither ? 1 : 0); + +	so_ref(so, &bso->so); +	bso->pipe = *cso; +	return (void *)bso; +} + +static void +nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv30_context *nv30 = nv30_context(pipe); + +	nv30->blend = hwcso; +	nv30->dirty |= NV30_NEW_BLEND; +} + +static void +nv30_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv30_blend_state *bso = hwcso; + +	so_ref(NULL, &bso->so); +	FREE(bso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { +	unsigned ret; + +	switch (wrap) { +	case PIPE_TEX_WRAP_REPEAT: +		ret = NV34TCL_TX_WRAP_S_REPEAT; +		break; +	case PIPE_TEX_WRAP_MIRROR_REPEAT: +		ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT; +		break; +	case PIPE_TEX_WRAP_CLAMP_TO_EDGE: +		ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE; +		break; +	case PIPE_TEX_WRAP_CLAMP_TO_BORDER: +		ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER; +		break; +	case PIPE_TEX_WRAP_CLAMP: +		ret = NV34TCL_TX_WRAP_S_CLAMP; +		break; +/*	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: +		ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_EDGE; +		break; +	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: +		ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_BORDER; +		break; +	case PIPE_TEX_WRAP_MIRROR_CLAMP: +		ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP; +		break;*/ +	default: +		NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); +		ret = NV34TCL_TX_WRAP_S_REPEAT; +		break; +	} + +	return ret >> NV34TCL_TX_WRAP_S_SHIFT; +} + +static void * +nv30_sampler_state_create(struct pipe_context *pipe, +			  const struct pipe_sampler_state *cso) +{ +	struct nv30_sampler_state *ps; +	uint32_t filter = 0; + +	ps = MALLOC(sizeof(struct nv30_sampler_state)); + +	ps->fmt = 0; +	/* TODO: Not all RECTs formats have this bit set, bits 15-8 of format +	   are the tx format to use. We should store normalized coord flag +	   in sampler state structure, and set appropriate format in +	   nvxx_fragtex_build() +	 */ +	/*NV34TCL_TX_FORMAT_RECT*/ +	/*if (!cso->normalized_coords) { +		ps->fmt |= (1<<14) ; +	}*/ + +	ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | +		    (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | +		    (wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT)); + +	ps->en = 0; + +	if (cso->max_anisotropy >= 8.0) { +		ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; +	} else +	if (cso->max_anisotropy >= 4.0) { +		ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; +	} else +	if (cso->max_anisotropy >= 2.0) { +		ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; +	} + +	switch (cso->mag_img_filter) { +	case PIPE_TEX_FILTER_LINEAR: +		filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR; +		break; +	case PIPE_TEX_FILTER_NEAREST: +	default: +		filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST; +		break; +	} + +	switch (cso->min_img_filter) { +	case PIPE_TEX_FILTER_LINEAR: +		switch (cso->min_mip_filter) { +		case PIPE_TEX_MIPFILTER_NEAREST: +			filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; +			break; +		case PIPE_TEX_MIPFILTER_LINEAR: +			filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; +			break; +		case PIPE_TEX_MIPFILTER_NONE: +		default: +			filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR; +			break; +		} +		break; +	case PIPE_TEX_FILTER_NEAREST: +	default: +		switch (cso->min_mip_filter) { +		case PIPE_TEX_MIPFILTER_NEAREST: +			filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; +		break; +		case PIPE_TEX_MIPFILTER_LINEAR: +			filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; +			break; +		case PIPE_TEX_MIPFILTER_NONE: +		default: +			filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST; +			break; +		} +		break; +	} + +	ps->filt = filter; + +	{ +		float limit; + +		limit = CLAMP(cso->lod_bias, -16.0, 15.0); +		ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + +		limit = CLAMP(cso->max_lod, 0.0, 15.0); +		ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/; + +		limit = CLAMP(cso->min_lod, 0.0, 15.0); +		ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/; +	} + +	if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { +		switch (cso->compare_func) { +		case PIPE_FUNC_NEVER: +			ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER; +			break; +		case PIPE_FUNC_GREATER: +			ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GREATER; +			break; +		case PIPE_FUNC_EQUAL: +			ps->wrap |= NV34TCL_TX_WRAP_RCOMP_EQUAL; +			break; +		case PIPE_FUNC_GEQUAL: +			ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GEQUAL; +			break; +		case PIPE_FUNC_LESS: +			ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LESS; +			break; +		case PIPE_FUNC_NOTEQUAL: +			ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NOTEQUAL; +			break; +		case PIPE_FUNC_LEQUAL: +			ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LEQUAL; +			break; +		case PIPE_FUNC_ALWAYS: +			ps->wrap |= NV34TCL_TX_WRAP_RCOMP_ALWAYS; +			break; +		default: +			break; +		} +	} + +	ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | +		    (float_to_ubyte(cso->border_color[0]) << 16) | +		    (float_to_ubyte(cso->border_color[1]) <<  8) | +		    (float_to_ubyte(cso->border_color[2]) <<  0)); + +	return (void *)ps; +} + +static void +nv30_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	unsigned unit; + +	for (unit = 0; unit < nr; unit++) { +		nv30->tex_sampler[unit] = sampler[unit]; +		nv30->dirty_samplers |= (1 << unit); +	} + +	for (unit = nr; unit < nv30->nr_samplers; unit++) { +		nv30->tex_sampler[unit] = NULL; +		nv30->dirty_samplers |= (1 << unit); +	} + +	nv30->nr_samplers = nr; +	nv30->dirty |= NV30_NEW_SAMPLER; +} + +static void +nv30_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	FREE(hwcso); +} + +static void +nv30_set_sampler_texture(struct pipe_context *pipe, unsigned nr, +			 struct pipe_texture **miptree) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	unsigned unit; + +	for (unit = 0; unit < nr; unit++) { +		pipe_texture_reference((struct pipe_texture **) +				       &nv30->tex_miptree[unit], miptree[unit]); +		nv30->dirty_samplers |= (1 << unit); +	} + +	for (unit = nr; unit < nv30->nr_textures; unit++) { +		pipe_texture_reference((struct pipe_texture **) +				       &nv30->tex_miptree[unit], NULL); +		nv30->dirty_samplers |= (1 << unit); +	} + +	nv30->nr_textures = nr; +	nv30->dirty |= NV30_NEW_SAMPLER; +} + +static void * +nv30_rasterizer_state_create(struct pipe_context *pipe, +			     const struct pipe_rasterizer_state *cso) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); +	struct nouveau_stateobj *so = so_new(32, 0); +	struct nouveau_grobj *rankine = nv30->screen->rankine; + +	/*XXX: ignored: +	 * 	light_twoside +	 * 	point_smooth -nohw +	 * 	multisample +	 */ + +	so_method(so, rankine, NV34TCL_SHADE_MODEL, 1); +	so_data  (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT : +				       NV34TCL_SHADE_MODEL_SMOOTH); + +	so_method(so, rankine, NV34TCL_LINE_WIDTH, 2); +	so_data  (so, (unsigned char)(cso->line_width * 8.0) & 0xff); +	so_data  (so, cso->line_smooth ? 1 : 0); +	so_method(so, rankine, NV34TCL_LINE_STIPPLE_ENABLE, 2); +	so_data  (so, cso->line_stipple_enable ? 1 : 0); +	so_data  (so, (cso->line_stipple_pattern << 16) | +		       cso->line_stipple_factor); + +	so_method(so, rankine, NV34TCL_POINT_SIZE, 1); +	so_data  (so, fui(cso->point_size)); + +	so_method(so, rankine, NV34TCL_POLYGON_MODE_FRONT, 6); +	if (cso->front_winding == PIPE_WINDING_CCW) { +		so_data(so, nvgl_polygon_mode(cso->fill_ccw)); +		so_data(so, nvgl_polygon_mode(cso->fill_cw)); +		switch (cso->cull_mode) { +		case PIPE_WINDING_CCW: +			so_data(so, NV34TCL_CULL_FACE_FRONT); +			break; +		case PIPE_WINDING_CW: +			so_data(so, NV34TCL_CULL_FACE_BACK); +			break; +		case PIPE_WINDING_BOTH: +			so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); +			break; +		default: +			so_data(so, NV34TCL_CULL_FACE_BACK); +			break; +		} +		so_data(so, NV34TCL_FRONT_FACE_CCW); +	} else { +		so_data(so, nvgl_polygon_mode(cso->fill_cw)); +		so_data(so, nvgl_polygon_mode(cso->fill_ccw)); +		switch (cso->cull_mode) { +		case PIPE_WINDING_CCW: +			so_data(so, NV34TCL_CULL_FACE_BACK); +			break; +		case PIPE_WINDING_CW: +			so_data(so, NV34TCL_CULL_FACE_FRONT); +			break; +		case PIPE_WINDING_BOTH: +			so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); +			break; +		default: +			so_data(so, NV34TCL_CULL_FACE_BACK); +			break; +		} +		so_data(so, NV34TCL_FRONT_FACE_CW); +	} +	so_data(so, cso->poly_smooth ? 1 : 0); +	so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); + +	so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); +	so_data  (so, cso->poly_stipple_enable ? 1 : 0); + +	so_method(so, rankine, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3); +	if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || +	    (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) +		so_data(so, 1); +	else +		so_data(so, 0); +	if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || +	    (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) +		so_data(so, 1); +	else +		so_data(so, 0); +	if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || +	    (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) +		so_data(so, 1); +	else +		so_data(so, 0); +	if (cso->offset_cw || cso->offset_ccw) { +		so_method(so, rankine, NV34TCL_POLYGON_OFFSET_FACTOR, 2); +		so_data  (so, fui(cso->offset_scale)); +		so_data  (so, fui(cso->offset_units * 2)); +	} + +	so_method(so, rankine, NV34TCL_POINT_SPRITE, 1); +	if (cso->point_sprite) { +		unsigned psctl = (1 << 0), i; + +		for (i = 0; i < 8; i++) { +			if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) +				psctl |= (1 << (8 + i)); +		} + +		so_data(so, psctl); +	} else { +		so_data(so, 0); +	} + +	so_ref(so, &rsso->so); +	rsso->pipe = *cso; +	return (void *)rsso; +} + +static void +nv30_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv30_context *nv30 = nv30_context(pipe); + +	nv30->rasterizer = hwcso; +	nv30->dirty |= NV30_NEW_RAST; +	/*nv30->draw_dirty |= NV30_NEW_RAST;*/ +} + +static void +nv30_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv30_rasterizer_state *rsso = hwcso; + +	so_ref(NULL, &rsso->so); +	FREE(rsso); +} + +static void * +nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe, +			const struct pipe_depth_stencil_alpha_state *cso) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); +	struct nouveau_stateobj *so = so_new(32, 0); +	struct nouveau_grobj *rankine = nv30->screen->rankine; + +	so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3); +	so_data  (so, nvgl_comparison_op(cso->depth.func)); +	so_data  (so, cso->depth.writemask ? 1 : 0); +	so_data  (so, cso->depth.enabled ? 1 : 0); + +	so_method(so, rankine, NV34TCL_ALPHA_FUNC_ENABLE, 3); +	so_data  (so, cso->alpha.enabled ? 1 : 0); +	so_data  (so, nvgl_comparison_op(cso->alpha.func)); +	so_data  (so, float_to_ubyte(cso->alpha.ref_value)); + +	if (cso->stencil[0].enabled) { +		so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 8); +		so_data  (so, cso->stencil[0].enabled ? 1 : 0); +		so_data  (so, cso->stencil[0].writemask); +		so_data  (so, nvgl_comparison_op(cso->stencil[0].func)); +		so_data  (so, cso->stencil[0].ref_value); +		so_data  (so, cso->stencil[0].valuemask); +		so_data  (so, nvgl_stencil_op(cso->stencil[0].fail_op)); +		so_data  (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); +		so_data  (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); +	} else { +		so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 1); +		so_data  (so, 0); +	} + +	if (cso->stencil[1].enabled) { +		so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 8); +		so_data  (so, cso->stencil[1].enabled ? 1 : 0); +		so_data  (so, cso->stencil[1].writemask); +		so_data  (so, nvgl_comparison_op(cso->stencil[1].func)); +		so_data  (so, cso->stencil[1].ref_value); +		so_data  (so, cso->stencil[1].valuemask); +		so_data  (so, nvgl_stencil_op(cso->stencil[1].fail_op)); +		so_data  (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); +		so_data  (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); +	} else { +		so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 1); +		so_data  (so, 0); +	} + +	so_ref(so, &zsaso->so); +	zsaso->pipe = *cso; +	return (void *)zsaso; +} + +static void +nv30_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv30_context *nv30 = nv30_context(pipe); + +	nv30->zsa = hwcso; +	nv30->dirty |= NV30_NEW_ZSA; +} + +static void +nv30_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv30_zsa_state *zsaso = hwcso; + +	so_ref(NULL, &zsaso->so); +	FREE(zsaso); +} + +static void * +nv30_vp_state_create(struct pipe_context *pipe, +		     const struct pipe_shader_state *cso) +{ +	/*struct nv30_context *nv30 = nv30_context(pipe);*/ +	struct nv30_vertex_program *vp; + +	vp = CALLOC(1, sizeof(struct nv30_vertex_program)); +	vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); +	/*vp->draw = draw_create_vertex_shader(nv30->draw, &vp->pipe);*/ + +	return (void *)vp; +} + +static void +nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv30_context *nv30 = nv30_context(pipe); + +	nv30->vertprog = hwcso; +	nv30->dirty |= NV30_NEW_VERTPROG; +	/*nv30->draw_dirty |= NV30_NEW_VERTPROG;*/ +} + +static void +nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	struct nv30_vertex_program *vp = hwcso; + +	/*draw_delete_vertex_shader(nv30->draw, vp->draw);*/ +	nv30_vertprog_destroy(nv30, vp); +	FREE((void*)vp->pipe.tokens); +	FREE(vp); +} + +static void * +nv30_fp_state_create(struct pipe_context *pipe, +		     const struct pipe_shader_state *cso) +{ +	struct nv30_fragment_program *fp; + +	fp = CALLOC(1, sizeof(struct nv30_fragment_program)); +	fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + +	tgsi_scan_shader(fp->pipe.tokens, &fp->info); + +	return (void *)fp; +} + +static void +nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv30_context *nv30 = nv30_context(pipe); + +	nv30->fragprog = hwcso; +	nv30->dirty |= NV30_NEW_FRAGPROG; +} + +static void +nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	struct nv30_fragment_program *fp = hwcso; + +	nv30_fragprog_destroy(nv30, fp); +	FREE((void*)fp->pipe.tokens); +	FREE(fp); +} + +static void +nv30_set_blend_color(struct pipe_context *pipe, +		     const struct pipe_blend_color *bcol) +{ +	struct nv30_context *nv30 = nv30_context(pipe); + +	nv30->blend_colour = *bcol; +	nv30->dirty |= NV30_NEW_BCOL; +} + +static void +nv30_set_clip_state(struct pipe_context *pipe, +		    const struct pipe_clip_state *clip) +{ +} + +static void +nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, +			 const struct pipe_constant_buffer *buf ) +{ +	struct nv30_context *nv30 = nv30_context(pipe); + +	nv30->constbuf[shader] = buf->buffer; +	nv30->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); + +	if (shader == PIPE_SHADER_VERTEX) { +		nv30->dirty |= NV30_NEW_VERTPROG; +	} else +	if (shader == PIPE_SHADER_FRAGMENT) { +		nv30->dirty |= NV30_NEW_FRAGPROG; +	} +} + +static void +nv30_set_framebuffer_state(struct pipe_context *pipe, +			   const struct pipe_framebuffer_state *fb) +{ +	struct nv30_context *nv30 = nv30_context(pipe); + +	nv30->framebuffer = *fb; +	nv30->dirty |= NV30_NEW_FB; +} + +static void +nv30_set_polygon_stipple(struct pipe_context *pipe, +			 const struct pipe_poly_stipple *stipple) +{ +	struct nv30_context *nv30 = nv30_context(pipe); + +	memcpy(nv30->stipple, stipple->stipple, 4 * 32); +	nv30->dirty |= NV30_NEW_STIPPLE; +} + +static void +nv30_set_scissor_state(struct pipe_context *pipe, +		       const struct pipe_scissor_state *s) +{ +	struct nv30_context *nv30 = nv30_context(pipe); + +	nv30->scissor = *s; +	nv30->dirty |= NV30_NEW_SCISSOR; +} + +static void +nv30_set_viewport_state(struct pipe_context *pipe, +			const struct pipe_viewport_state *vpt) +{ +	struct nv30_context *nv30 = nv30_context(pipe); + +	nv30->viewport = *vpt; +	nv30->dirty |= NV30_NEW_VIEWPORT; +	/*nv30->draw_dirty |= NV30_NEW_VIEWPORT;*/ +} + +static void +nv30_set_vertex_buffers(struct pipe_context *pipe, unsigned count, +			const struct pipe_vertex_buffer *vb) +{ +	struct nv30_context *nv30 = nv30_context(pipe); + +	memcpy(nv30->vtxbuf, vb, sizeof(*vb) * count); +	nv30->vtxbuf_nr = count; + +	nv30->dirty |= NV30_NEW_ARRAYS; +	/*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ +} + +static void +nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count, +			 const struct pipe_vertex_element *ve) +{ +	struct nv30_context *nv30 = nv30_context(pipe); + +	memcpy(nv30->vtxelt, ve, sizeof(*ve) * count); +	nv30->vtxelt_nr = count; + +	nv30->dirty |= NV30_NEW_ARRAYS; +	/*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ +} + +static void +nv30_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +	struct nv30_context *nv30 = nv30_context(pipe); + +	nv30->edgeflags = bitfield; +	nv30->dirty |= NV30_NEW_ARRAYS; +	/*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ +} + +void +nv30_init_state_functions(struct nv30_context *nv30) +{ +	nv30->pipe.create_blend_state = nv30_blend_state_create; +	nv30->pipe.bind_blend_state = nv30_blend_state_bind; +	nv30->pipe.delete_blend_state = nv30_blend_state_delete; + +	nv30->pipe.create_sampler_state = nv30_sampler_state_create; +	nv30->pipe.bind_sampler_states = nv30_sampler_state_bind; +	nv30->pipe.delete_sampler_state = nv30_sampler_state_delete; +	nv30->pipe.set_sampler_textures = nv30_set_sampler_texture; + +	nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create; +	nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind; +	nv30->pipe.delete_rasterizer_state = nv30_rasterizer_state_delete; + +	nv30->pipe.create_depth_stencil_alpha_state = +		nv30_depth_stencil_alpha_state_create; +	nv30->pipe.bind_depth_stencil_alpha_state = +		nv30_depth_stencil_alpha_state_bind; +	nv30->pipe.delete_depth_stencil_alpha_state = +		nv30_depth_stencil_alpha_state_delete; + +	nv30->pipe.create_vs_state = nv30_vp_state_create; +	nv30->pipe.bind_vs_state = nv30_vp_state_bind; +	nv30->pipe.delete_vs_state = nv30_vp_state_delete; + +	nv30->pipe.create_fs_state = nv30_fp_state_create; +	nv30->pipe.bind_fs_state = nv30_fp_state_bind; +	nv30->pipe.delete_fs_state = nv30_fp_state_delete; + +	nv30->pipe.set_blend_color = nv30_set_blend_color; +	nv30->pipe.set_clip_state = nv30_set_clip_state; +	nv30->pipe.set_constant_buffer = nv30_set_constant_buffer; +	nv30->pipe.set_framebuffer_state = nv30_set_framebuffer_state; +	nv30->pipe.set_polygon_stipple = nv30_set_polygon_stipple; +	nv30->pipe.set_scissor_state = nv30_set_scissor_state; +	nv30->pipe.set_viewport_state = nv30_set_viewport_state; + +	nv30->pipe.set_edgeflags = nv30_set_edgeflags; +	nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers; +	nv30->pipe.set_vertex_elements = nv30_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h new file mode 100644 index 0000000000..2023278e37 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state.h @@ -0,0 +1,88 @@ +#ifndef __NV30_STATE_H__ +#define __NV30_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv30_sampler_state { +	uint32_t fmt; +	uint32_t wrap; +	uint32_t en; +	uint32_t filt; +	uint32_t bcol; +}; + +struct nv30_vertex_program_exec { +	uint32_t data[4]; +	boolean has_branch_offset; +	int const_index; +}; + +struct nv30_vertex_program_data { +	int index; /* immediates == -1 */ +	float value[4]; +}; + +struct nv30_vertex_program { +	struct pipe_shader_state pipe; + +	boolean translated; + +	struct nv30_vertex_program_exec *insns; +	unsigned nr_insns; +	struct nv30_vertex_program_data *consts; +	unsigned nr_consts; + +	struct nouveau_resource *exec; +	unsigned exec_start; +	struct nouveau_resource *data; +	unsigned data_start; +	unsigned data_start_min; + +	uint32_t ir; +	uint32_t or; +	struct nouveau_stateobj *so; +}; + +struct nv30_fragment_program_data { +	unsigned offset; +	unsigned index; +}; + +struct nv30_fragment_program { +	struct pipe_shader_state pipe; +	struct tgsi_shader_info info; + +	boolean translated; +	boolean on_hw; +	unsigned samplers; + +	uint32_t *insn; +	int       insn_len; + +	struct nv30_fragment_program_data *consts; +	unsigned nr_consts; + +	struct pipe_buffer *buffer; + +	uint32_t fp_control; +	uint32_t fp_reg_control; +	struct nouveau_stateobj *so; +}; + +struct nv30_miptree { +	struct pipe_texture base; + +	struct pipe_buffer *buffer; +	uint total_size; + +	struct pipe_texture *shadow_tex; +	struct pipe_surface *shadow_surface; + +	struct { +		uint pitch; +		uint *image_offset; +	} level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c new file mode 100644 index 0000000000..44d43e132a --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_blend.c @@ -0,0 +1,40 @@ +#include "nv30_context.h" + +static boolean +nv30_state_blend_validate(struct nv30_context *nv30) +{ +	so_ref(nv30->blend->so, &nv30->state.hw[NV30_STATE_BLEND]); +	return TRUE; +} + +struct nv30_state_entry nv30_state_blend = { +	.validate = nv30_state_blend_validate, +	.dirty = { +		.pipe = NV30_NEW_BLEND, +		.hw = NV30_STATE_BLEND +	} +}; + +static boolean +nv30_state_blend_colour_validate(struct nv30_context *nv30) +{ +	struct nouveau_stateobj *so = so_new(2, 0); +	struct pipe_blend_color *bcol = &nv30->blend_colour; + +	so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1); +	so_data  (so, ((float_to_ubyte(bcol->color[3]) << 24) | +		       (float_to_ubyte(bcol->color[0]) << 16) | +		       (float_to_ubyte(bcol->color[1]) <<  8) | +		       (float_to_ubyte(bcol->color[2]) <<  0))); + +	so_ref(so, &nv30->state.hw[NV30_STATE_BCOL]); +	return TRUE; +} + +struct nv30_state_entry nv30_state_blend_colour = { +	.validate = nv30_state_blend_colour_validate, +	.dirty = { +		.pipe = NV30_NEW_BCOL, +		.hw = NV30_STATE_BCOL +	} +}; diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c new file mode 100644 index 0000000000..f77b08ff69 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -0,0 +1,118 @@ +#include "nv30_context.h" +#include "nv30_state.h" + +static struct nv30_state_entry *render_states[] = { +	&nv30_state_framebuffer, +	&nv30_state_rasterizer, +	&nv30_state_scissor, +	&nv30_state_stipple, +	&nv30_state_fragprog, +	&nv30_state_fragtex, +	&nv30_state_vertprog, +	&nv30_state_blend, +	&nv30_state_blend_colour, +	&nv30_state_zsa, +	&nv30_state_viewport, +	&nv30_state_vbo, +	NULL +}; + +static void +nv30_state_do_validate(struct nv30_context *nv30, +		       struct nv30_state_entry **states) +{ +	const struct pipe_framebuffer_state *fb = &nv30->framebuffer; +	unsigned i; + +	for (i = 0; i < fb->nr_cbufs; i++) +		fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; +	if (fb->zsbuf) +		fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + +	while (*states) { +		struct nv30_state_entry *e = *states; + +		if (nv30->dirty & e->dirty.pipe) { +			if (e->validate(nv30)) { +				nv30->state.dirty |= (1ULL << e->dirty.hw); +			} +		} + +		states++; +	} +	nv30->dirty = 0; +} + +void +nv30_state_emit(struct nv30_context *nv30) +{ +	struct nv30_state *state = &nv30->state; +	struct nv30_screen *screen = nv30->screen; +	unsigned i, samplers; +	uint64_t states; + +	if (nv30->pctx_id != screen->cur_pctx) { +		for (i = 0; i < NV30_STATE_MAX; i++) { +			if (state->hw[i] && screen->state[i] != state->hw[i]) +				state->dirty |= (1ULL << i); +		} + +		screen->cur_pctx = nv30->pctx_id; +	} + +	for (i = 0, states = state->dirty; states; i++) { +		if (!(states & (1ULL << i))) +			continue; +		so_ref (state->hw[i], &nv30->screen->state[i]); +		if (state->hw[i]) +			so_emit(nv30->nvws, nv30->screen->state[i]); +		states &= ~(1ULL << i); +	} + +	state->dirty = 0; + +	so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FB]); +	for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { +		if (!(samplers & (1 << i))) +			continue; +		so_emit_reloc_markers(nv30->nvws, +				      state->hw[NV30_STATE_FRAGTEX0+i]); +		samplers &= ~(1ULL << i); +	} +	so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FRAGPROG]); +	if (state->hw[NV30_STATE_VTXBUF] /*&& nv30->render_mode == HW*/) +		so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_VTXBUF]); +} + +boolean +nv30_state_validate(struct nv30_context *nv30) +{ +#if 0 +	boolean was_sw = nv30->fallback_swtnl ? TRUE : FALSE; + +	if (nv30->render_mode != HW) { +		/* Don't even bother trying to go back to hw if none +		 * of the states that caused swtnl previously have changed. +		 */ +		if ((nv30->fallback_swtnl & nv30->dirty) +				!= nv30->fallback_swtnl) +			return FALSE; + +		/* Attempt to go to hwtnl again */ +		nv30->pipe.flush(&nv30->pipe, 0, NULL); +		nv30->dirty |= (NV30_NEW_VIEWPORT | +				NV30_NEW_VERTPROG | +				NV30_NEW_ARRAYS); +		nv30->render_mode = HW; +	} +#endif +	nv30_state_do_validate(nv30, render_states); +#if 0 +	if (nv30->fallback_swtnl || nv30->fallback_swrast) +		return FALSE; +	 +	if (was_sw) +		NOUVEAU_ERR("swtnl->hw\n"); +#endif +	return TRUE; +} diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c new file mode 100644 index 0000000000..77368cb205 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -0,0 +1,144 @@ +#include "nv30_context.h" +#include "nouveau/nouveau_util.h" + +static boolean +nv30_state_framebuffer_validate(struct nv30_context *nv30) +{ +	struct pipe_framebuffer_state *fb = &nv30->framebuffer; +	struct pipe_surface *rt[2], *zeta = NULL; +	uint32_t rt_enable, rt_format; +	int i, colour_format = 0, zeta_format = 0; +	struct nouveau_stateobj *so = so_new(64, 10); +	unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; +	unsigned w = fb->width; +	unsigned h = fb->height; +	struct nv30_miptree *nv30mt; + +	rt_enable = 0; +	for (i = 0; i < fb->nr_cbufs; i++) { +		if (colour_format) { +			assert(colour_format == fb->cbufs[i]->format); +		} else { +			colour_format = fb->cbufs[i]->format; +			rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); +			rt[i] = fb->cbufs[i]; +		} +	} + +	if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) +		rt_enable |= NV34TCL_RT_ENABLE_MRT; + +	if (fb->zsbuf) { +		zeta_format = fb->zsbuf->format; +		zeta = fb->zsbuf; +	} + +	if (!(rt[0]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { +		assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); +		for (i = 1; i < fb->nr_cbufs; i++) +			assert(!(rt[i]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); + +		/* FIXME: NV34TCL_RT_FORMAT_LOG2_[WIDTH/HEIGHT] */ +		rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | +		log2i(fb->width) << 16 /*NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT*/ | +		log2i(fb->height) << 24 /*NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT*/; +	} +	else +		rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + +	switch (colour_format) { +	case PIPE_FORMAT_A8R8G8B8_UNORM: +	case 0: +		rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; +		break; +	case PIPE_FORMAT_R5G6B5_UNORM: +		rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; +		break; +	default: +		assert(0); +	} + +	switch (zeta_format) { +	case PIPE_FORMAT_Z16_UNORM: +		rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; +		break; +	case PIPE_FORMAT_Z24S8_UNORM: +	case 0: +		rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; +		break; +	default: +		assert(0); +	} + +	if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) { +		uint32_t pitch = rt[0]->stride; +		if (zeta) { +			pitch |= (zeta->stride << 16); +		} else { +			pitch |= (pitch << 16); +		} + +		nv30mt = (struct nv30_miptree *)rt[0]->texture; +		so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR0, 1); +		so_reloc (so, nv30mt->buffer, 0, rt_flags | NOUVEAU_BO_OR, +			  nv30->nvws->channel->vram->handle, +			  nv30->nvws->channel->gart->handle); +		so_method(so, nv30->screen->rankine, NV34TCL_COLOR0_PITCH, 2); +		so_data  (so, pitch); +		so_reloc (so, nv30mt->buffer, rt[0]->offset, rt_flags | +			  NOUVEAU_BO_LOW, 0, 0); +	} + +	if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { +		nv30mt = (struct nv30_miptree *)rt[1]->texture; +		so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR1, 1); +		so_reloc (so, nv30mt->buffer, 0, rt_flags | NOUVEAU_BO_OR, +			  nv30->nvws->channel->vram->handle, +			  nv30->nvws->channel->gart->handle); +		so_method(so, nv30->screen->rankine, NV34TCL_COLOR1_OFFSET, 2); +		so_reloc (so, nv30mt->buffer, rt[1]->offset, rt_flags | +			  NOUVEAU_BO_LOW, 0, 0); +		so_data  (so, rt[1]->stride); +	} + +	if (zeta_format) { +		nv30mt = (struct nv30_miptree *)zeta->texture; +		so_method(so, nv30->screen->rankine, NV34TCL_DMA_ZETA, 1); +		so_reloc (so, nv30mt->buffer, 0, rt_flags | NOUVEAU_BO_OR, +			  nv30->nvws->channel->vram->handle, +			  nv30->nvws->channel->gart->handle); +		so_method(so, nv30->screen->rankine, NV34TCL_ZETA_OFFSET, 1); +		so_reloc (so, nv30mt->buffer, zeta->offset, rt_flags | +			  NOUVEAU_BO_LOW, 0, 0); +		/* TODO: allocate LMA depth buffer */ +	} + +	so_method(so, nv30->screen->rankine, NV34TCL_RT_ENABLE, 1); +	so_data  (so, rt_enable); +	so_method(so, nv30->screen->rankine, NV34TCL_RT_HORIZ, 3); +	so_data  (so, (w << 16) | 0); +	so_data  (so, (h << 16) | 0); +	so_data  (so, rt_format); +	so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_HORIZ, 2); +	so_data  (so, (w << 16) | 0); +	so_data  (so, (h << 16) | 0); +	so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); +	so_data  (so, ((w - 1) << 16) | 0); +	so_data  (so, ((h - 1) << 16) | 0); +	so_method(so, nv30->screen->rankine, 0x1d88, 1); +	so_data  (so, (1 << 12) | h); +	/* Wonder why this is needed, context should all be set to zero on init */ +	so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_TX_ORIGIN, 1); +	so_data  (so, 0); + +	so_ref(so, &nv30->state.hw[NV30_STATE_FB]); +	return TRUE; +} + +struct nv30_state_entry nv30_state_framebuffer = { +	.validate = nv30_state_framebuffer_validate, +	.dirty = { +		.pipe = NV30_NEW_FB, +		.hw = NV30_STATE_FB +	} +}; diff --git a/src/gallium/drivers/nv30/nv30_state_rasterizer.c b/src/gallium/drivers/nv30/nv30_state_rasterizer.c new file mode 100644 index 0000000000..6d1b60e043 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_rasterizer.c @@ -0,0 +1,17 @@ +#include "nv30_context.h" + +static boolean +nv30_state_rasterizer_validate(struct nv30_context *nv30) +{ +	so_ref(nv30->rasterizer->so, +	       &nv30->state.hw[NV30_STATE_RAST]); +	return TRUE; +} + +struct nv30_state_entry nv30_state_rasterizer = { +	.validate = nv30_state_rasterizer_validate, +	.dirty = { +		.pipe = NV30_NEW_RAST, +		.hw = NV30_STATE_RAST +	} +}; diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c new file mode 100644 index 0000000000..1db9bc1795 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_scissor.c @@ -0,0 +1,35 @@ +#include "nv30_context.h" + +static boolean +nv30_state_scissor_validate(struct nv30_context *nv30) +{ +	struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe; +	struct pipe_scissor_state *s = &nv30->scissor; +	struct nouveau_stateobj *so; + +	if (nv30->state.hw[NV30_STATE_SCISSOR] && +	    (rast->scissor == 0 && nv30->state.scissor_enabled == 0)) +		return FALSE; +	nv30->state.scissor_enabled = rast->scissor; + +	so = so_new(3, 0); +	so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2); +	if (nv30->state.scissor_enabled) { +		so_data  (so, ((s->maxx - s->minx) << 16) | s->minx); +		so_data  (so, ((s->maxy - s->miny) << 16) | s->miny); +	} else { +		so_data  (so, 4096 << 16); +		so_data  (so, 4096 << 16); +	} + +	so_ref(so, &nv30->state.hw[NV30_STATE_SCISSOR]); +	return TRUE; +} + +struct nv30_state_entry nv30_state_scissor = { +	.validate = nv30_state_scissor_validate, +	.dirty = { +		.pipe = NV30_NEW_SCISSOR | NV30_NEW_RAST, +		.hw = NV30_STATE_SCISSOR +	} +}; diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c new file mode 100644 index 0000000000..41b42813b4 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_stipple.c @@ -0,0 +1,39 @@ +#include "nv30_context.h" + +static boolean +nv30_state_stipple_validate(struct nv30_context *nv30) +{ +	struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe; +	struct nouveau_grobj *rankine = nv30->screen->rankine; +	struct nouveau_stateobj *so; + +	if (nv30->state.hw[NV30_STATE_STIPPLE] && +	   (rast->poly_stipple_enable == 0 && nv30->state.stipple_enabled == 0)) +		return FALSE; + +	if (rast->poly_stipple_enable) { +		unsigned i; + +		so = so_new(35, 0); +		so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); +		so_data  (so, 1); +		so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); +		for (i = 0; i < 32; i++) +			so_data(so, nv30->stipple[i]); +	} else { +		so = so_new(2, 0); +		so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); +		so_data  (so, 0); +	} + +	so_ref(so, &nv30->state.hw[NV30_STATE_STIPPLE]); +	return TRUE; +} + +struct nv30_state_entry nv30_state_stipple = { +	.validate = nv30_state_stipple_validate, +	.dirty = { +		.pipe = NV30_NEW_STIPPLE | NV30_NEW_RAST, +		.hw = NV30_STATE_STIPPLE, +	} +}; diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c new file mode 100644 index 0000000000..951d40ebfd --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_viewport.c @@ -0,0 +1,70 @@ +#include "nv30_context.h" + +static boolean +nv30_state_viewport_validate(struct nv30_context *nv30) +{ +	struct pipe_viewport_state *vpt = &nv30->viewport; +	struct nouveau_stateobj *so; +	unsigned bypass; + +	if (/*nv30->render_mode == HW &&*/ !nv30->rasterizer->pipe.bypass_clipping) +		bypass = 0; +	else +		bypass = 1; + +	if (nv30->state.hw[NV30_STATE_VIEWPORT] && +	    (bypass || !(nv30->dirty & NV30_NEW_VIEWPORT)) && +	    nv30->state.viewport_bypass == bypass) +		return FALSE; +	nv30->state.viewport_bypass = bypass; + +	so = so_new(11, 0); +	if (!bypass) { +		so_method(so, nv30->screen->rankine, +			  NV34TCL_VIEWPORT_TRANSLATE_X, 8); +		so_data  (so, fui(vpt->translate[0])); +		so_data  (so, fui(vpt->translate[1])); +		so_data  (so, fui(vpt->translate[2])); +		so_data  (so, fui(vpt->translate[3])); +		so_data  (so, fui(vpt->scale[0])); +		so_data  (so, fui(vpt->scale[1])); +		so_data  (so, fui(vpt->scale[2])); +		so_data  (so, fui(vpt->scale[3])); +/*		so_method(so, nv30->screen->rankine, 0x1d78, 1); +		so_data  (so, 1); +*/	} else { +		so_method(so, nv30->screen->rankine, +			  NV34TCL_VIEWPORT_TRANSLATE_X, 8); +		so_data  (so, fui(0.0)); +		so_data  (so, fui(0.0)); +		so_data  (so, fui(0.0)); +		so_data  (so, fui(0.0)); +		so_data  (so, fui(1.0)); +		so_data  (so, fui(1.0)); +		so_data  (so, fui(1.0)); +		so_data  (so, fui(0.0)); +		/* Not entirely certain what this is yet.  The DDX uses this +		 * value also as it fixes rendering when you pass +		 * pre-transformed vertices to the GPU.  My best gusss is that +		 * this bypasses some culling/clipping stage.  Might be worth +		 * noting that points/lines are uneffected by whatever this +		 * value fixes, only filled polygons are effected. +		 */ +/*		so_method(so, nv30->screen->rankine, 0x1d78, 1); +		so_data  (so, 0x110); +*/	} +	/* TODO/FIXME: never saw value 0x0110 in renouveau dumps, only 0x0001 */ +	so_method(so, nv30->screen->rankine, 0x1d78, 1); +	so_data  (so, 1); + +	so_ref(so, &nv30->state.hw[NV30_STATE_VIEWPORT]); +	return TRUE; +} + +struct nv30_state_entry nv30_state_viewport = { +	.validate = nv30_state_viewport_validate, +	.dirty = { +		.pipe = NV30_NEW_VIEWPORT | NV30_NEW_RAST, +		.hw = NV30_STATE_VIEWPORT +	} +}; diff --git a/src/gallium/drivers/nv30/nv30_state_zsa.c b/src/gallium/drivers/nv30/nv30_state_zsa.c new file mode 100644 index 0000000000..0940b7269b --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_zsa.c @@ -0,0 +1,17 @@ +#include "nv30_context.h" + +static boolean +nv30_state_zsa_validate(struct nv30_context *nv30) +{ +	so_ref(nv30->zsa->so, +	       &nv30->state.hw[NV30_STATE_ZSA]); +	return TRUE; +} + +struct nv30_state_entry nv30_state_zsa = { +	.validate = nv30_state_zsa_validate, +	.dirty = { +		.pipe = NV30_NEW_ZSA, +		.hw = NV30_STATE_ZSA +	} +}; diff --git a/src/gallium/drivers/nv30/nv30_surface.c b/src/gallium/drivers/nv30/nv30_surface.c new file mode 100644 index 0000000000..0f8dc12045 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_surface.c @@ -0,0 +1,72 @@ + +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "nv30_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" + +static void +nv30_surface_copy(struct pipe_context *pipe, boolean do_flip, +		  struct pipe_surface *dest, unsigned destx, unsigned desty, +		  struct pipe_surface *src, unsigned srcx, unsigned srcy, +		  unsigned width, unsigned height) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	struct nv04_surface_2d *eng2d = nv30->screen->eng2d; + +	if (do_flip) { +		desty += height; +		while (height--) { +			eng2d->copy(eng2d, dest, destx, desty--, src, +				    srcx, srcy++, width, 1); +		} +		return; +	} + +	eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); +} + +static void +nv30_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, +		  unsigned destx, unsigned desty, unsigned width, +		  unsigned height, unsigned value) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	struct nv04_surface_2d *eng2d = nv30->screen->eng2d; + +	eng2d->fill(eng2d, dest, destx, desty, width, height, value); +} + +void +nv30_init_surface_functions(struct nv30_context *nv30) +{ +	nv30->pipe.surface_copy = nv30_surface_copy; +	nv30->pipe.surface_fill = nv30_surface_fill; +} diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c new file mode 100644 index 0000000000..2d6d48ac16 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -0,0 +1,556 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv30_context.h" +#include "nv30_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nouveau_util.h" + +#define FORCE_SWTNL 0 + +static INLINE int +nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) +{ +	switch (pipe) { +	case PIPE_FORMAT_R32_FLOAT: +	case PIPE_FORMAT_R32G32_FLOAT: +	case PIPE_FORMAT_R32G32B32_FLOAT: +	case PIPE_FORMAT_R32G32B32A32_FLOAT: +		*fmt = NV34TCL_VTXFMT_TYPE_FLOAT; +		break; +	case PIPE_FORMAT_R8_UNORM: +	case PIPE_FORMAT_R8G8_UNORM: +	case PIPE_FORMAT_R8G8B8_UNORM: +	case PIPE_FORMAT_R8G8B8A8_UNORM: +		*fmt = NV34TCL_VTXFMT_TYPE_UBYTE; +		break; +	case PIPE_FORMAT_R16_SSCALED: +	case PIPE_FORMAT_R16G16_SSCALED: +	case PIPE_FORMAT_R16G16B16_SSCALED: +	case PIPE_FORMAT_R16G16B16A16_SSCALED: +		*fmt = NV34TCL_VTXFMT_TYPE_USHORT; +		break; +	default: +		NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); +		return 1; +	} + +	switch (pipe) { +	case PIPE_FORMAT_R8_UNORM: +	case PIPE_FORMAT_R32_FLOAT: +	case PIPE_FORMAT_R16_SSCALED: +		*ncomp = 1; +		break; +	case PIPE_FORMAT_R8G8_UNORM: +	case PIPE_FORMAT_R32G32_FLOAT: +	case PIPE_FORMAT_R16G16_SSCALED: +		*ncomp = 2; +		break; +	case PIPE_FORMAT_R8G8B8_UNORM: +	case PIPE_FORMAT_R32G32B32_FLOAT: +	case PIPE_FORMAT_R16G16B16_SSCALED: +		*ncomp = 3; +		break; +	case PIPE_FORMAT_R8G8B8A8_UNORM: +	case PIPE_FORMAT_R32G32B32A32_FLOAT: +	case PIPE_FORMAT_R16G16B16A16_SSCALED: +		*ncomp = 4; +		break; +	default: +		NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); +		return 1; +	} + +	return 0; +} + +static boolean +nv30_vbo_set_idxbuf(struct nv30_context *nv30, struct pipe_buffer *ib, +		    unsigned ib_size) +{ +	struct pipe_screen *pscreen = &nv30->screen->pipe; +	unsigned type; + +	if (!ib) { +		nv30->idxbuf = NULL; +		nv30->idxbuf_format = 0xdeadbeef; +		return FALSE; +	} + +	if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1) +		return FALSE; + +	switch (ib_size) { +	case 2: +		type = NV34TCL_IDXBUF_FORMAT_TYPE_U16; +		break; +	case 4: +		type = NV34TCL_IDXBUF_FORMAT_TYPE_U32; +		break; +	default: +		return FALSE; +	} + +	if (ib != nv30->idxbuf || +	    type != nv30->idxbuf_format) { +		nv30->dirty |= NV30_NEW_ARRAYS; +		nv30->idxbuf = ib; +		nv30->idxbuf_format = type; +	} + +	return TRUE; +} + +static boolean +nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, +		       int attrib, struct pipe_vertex_element *ve, +		       struct pipe_vertex_buffer *vb) +{ +	struct pipe_winsys *ws = nv30->pipe.winsys; +	struct nouveau_grobj *rankine = nv30->screen->rankine; +	unsigned type, ncomp; +	void *map; + +	if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) +		return FALSE; + +	map  = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); +	map += vb->buffer_offset + ve->src_offset; + +	switch (type) { +	case NV34TCL_VTXFMT_TYPE_FLOAT: +	{ +		float *v = map; + +		switch (ncomp) { +		case 4: +			so_method(so, rankine, NV34TCL_VTX_ATTR_4F_X(attrib), 4); +			so_data  (so, fui(v[0])); +			so_data  (so, fui(v[1])); +			so_data  (so, fui(v[2])); +			so_data  (so, fui(v[3])); +			break; +		case 3: +			so_method(so, rankine, NV34TCL_VTX_ATTR_3F_X(attrib), 3); +			so_data  (so, fui(v[0])); +			so_data  (so, fui(v[1])); +			so_data  (so, fui(v[2])); +			break; +		case 2: +			so_method(so, rankine, NV34TCL_VTX_ATTR_2F_X(attrib), 2); +			so_data  (so, fui(v[0])); +			so_data  (so, fui(v[1])); +			break; +		case 1: +			so_method(so, rankine, NV34TCL_VTX_ATTR_1F(attrib), 1); +			so_data  (so, fui(v[0])); +			break; +		default: +			ws->buffer_unmap(ws, vb->buffer); +			return FALSE; +		} +	} +		break; +	default: +		ws->buffer_unmap(ws, vb->buffer); +		return FALSE; +	} + +	ws->buffer_unmap(ws, vb->buffer); + +	return TRUE; +} + +boolean +nv30_draw_arrays(struct pipe_context *pipe, +		 unsigned mode, unsigned start, unsigned count) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	struct nouveau_channel *chan = nv30->nvws->channel; +	unsigned restart = 0; + +	nv30_vbo_set_idxbuf(nv30, NULL, 0); +	if (FORCE_SWTNL || !nv30_state_validate(nv30)) { +		/*return nv30_draw_elements_swtnl(pipe, NULL, 0, +						mode, start, count);*/ +		return FALSE; +	} + +	while (count) { +		unsigned vc, nr; + +		nv30_state_emit(nv30); + +		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, +					mode, start, count, &restart); +		if (!vc) { +			FIRE_RING(NULL); +			continue; +		} + +		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); +		OUT_RING  (nvgl_primitive(mode)); + +		nr = (vc & 0xff); +		if (nr) { +			BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1); +			OUT_RING  (((nr - 1) << 24) | start); +			start += nr; +		} + +		nr = vc >> 8; +		while (nr) { +			unsigned push = nr > 2047 ? 2047 : nr; + +			nr -= push; + +			BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push); +			while (push--) { +				OUT_RING(((0x100 - 1) << 24) | start); +				start += 0x100; +			} +		} + +		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); +		OUT_RING  (0); + +		count -= vc; +		start = restart; +	} + +	pipe->flush(pipe, 0, NULL); +	return TRUE; +} + +static INLINE void +nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, +		       unsigned mode, unsigned start, unsigned count) +{ +	struct nouveau_channel *chan = nv30->nvws->channel; + +	while (count) { +		uint8_t *elts = (uint8_t *)ib + start; +		unsigned vc, push, restart = 0; + +		nv30_state_emit(nv30); + +		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, +					mode, start, count, &restart); +		if (vc == 0) { +			FIRE_RING(NULL); +			continue; +		} +		count -= vc; + +		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); +		OUT_RING  (nvgl_primitive(mode)); + +		if (vc & 1) { +			BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); +			OUT_RING  (elts[0]); +			elts++; vc--; +		} + +		while (vc) { +			unsigned i; + +			push = MIN2(vc, 2047 * 2); + +			BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); +			for (i = 0; i < push; i+=2) +				OUT_RING((elts[i+1] << 16) | elts[i]); + +			vc -= push; +			elts += push; +		} + +		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); +		OUT_RING  (0); + +		start = restart; +	} +} + +static INLINE void +nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, +		       unsigned mode, unsigned start, unsigned count) +{ +	struct nouveau_channel *chan = nv30->nvws->channel; + +	while (count) { +		uint16_t *elts = (uint16_t *)ib + start; +		unsigned vc, push, restart = 0; + +		nv30_state_emit(nv30); + +		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, +					mode, start, count, &restart); +		if (vc == 0) { +			FIRE_RING(NULL); +			continue; +		} +		count -= vc; + +		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); +		OUT_RING  (nvgl_primitive(mode)); + +		if (vc & 1) { +			BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); +			OUT_RING  (elts[0]); +			elts++; vc--; +		} + +		while (vc) { +			unsigned i; + +			push = MIN2(vc, 2047 * 2); + +			BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); +			for (i = 0; i < push; i+=2) +				OUT_RING((elts[i+1] << 16) | elts[i]); + +			vc -= push; +			elts += push; +		} + +		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); +		OUT_RING  (0); + +		start = restart; +	} +} + +static INLINE void +nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, +		       unsigned mode, unsigned start, unsigned count) +{ +	struct nouveau_channel *chan = nv30->nvws->channel; + +	while (count) { +		uint32_t *elts = (uint32_t *)ib + start; +		unsigned vc, push, restart = 0; + +		nv30_state_emit(nv30); + +		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, +					mode, start, count, &restart); +		if (vc == 0) { +			FIRE_RING(NULL); +			continue; +		} +		count -= vc; + +		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); +		OUT_RING  (nvgl_primitive(mode)); + +		while (vc) { +			push = MIN2(vc, 2047); + +			BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push); +			OUT_RINGp    (elts, push); + +			vc -= push; +			elts += push; +		} + +		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); +		OUT_RING  (0); + +		start = restart; +	} +} + +static boolean +nv30_draw_elements_inline(struct pipe_context *pipe, +			  struct pipe_buffer *ib, unsigned ib_size, +			  unsigned mode, unsigned start, unsigned count) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	struct pipe_winsys *ws = pipe->winsys; +	void *map; + +	map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); +	if (!ib) { +		NOUVEAU_ERR("failed mapping ib\n"); +		return FALSE; +	} + +	switch (ib_size) { +	case 1: +		nv30_draw_elements_u08(nv30, map, mode, start, count); +		break; +	case 2: +		nv30_draw_elements_u16(nv30, map, mode, start, count); +		break; +	case 4: +		nv30_draw_elements_u32(nv30, map, mode, start, count); +		break; +	default: +		NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); +		break; +	} + +	ws->buffer_unmap(ws, ib); +	return TRUE; +} + +static boolean +nv30_draw_elements_vbo(struct pipe_context *pipe, +		       unsigned mode, unsigned start, unsigned count) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	struct nouveau_channel *chan = nv30->nvws->channel; +	unsigned restart = 0; + +	while (count) { +		unsigned nr, vc; + +		nv30_state_emit(nv30); + +		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, +					mode, start, count, &restart); +		if (!vc) { +			FIRE_RING(NULL); +			continue; +		} +		 +		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); +		OUT_RING  (nvgl_primitive(mode)); + +		nr = (vc & 0xff); +		if (nr) { +			BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1); +			OUT_RING  (((nr - 1) << 24) | start); +			start += nr; +		} + +		nr = vc >> 8; +		while (nr) { +			unsigned push = nr > 2047 ? 2047 : nr; + +			nr -= push; + +			BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push); +			while (push--) { +				OUT_RING(((0x100 - 1) << 24) | start); +				start += 0x100; +			} +		} + +		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); +		OUT_RING  (0); + +		count -= vc; +		start = restart; +	} + +	return TRUE; +} + +boolean +nv30_draw_elements(struct pipe_context *pipe, +		   struct pipe_buffer *indexBuffer, unsigned indexSize, +		   unsigned mode, unsigned start, unsigned count) +{ +	struct nv30_context *nv30 = nv30_context(pipe); +	boolean idxbuf; + +	idxbuf = nv30_vbo_set_idxbuf(nv30, indexBuffer, indexSize); +	if (FORCE_SWTNL || !nv30_state_validate(nv30)) { +		/*return nv30_draw_elements_swtnl(pipe, NULL, 0, +						mode, start, count);*/ +		return FALSE;	 +	} + +	if (idxbuf) { +		nv30_draw_elements_vbo(pipe, mode, start, count); +	} else { +		nv30_draw_elements_inline(pipe, indexBuffer, indexSize, +					  mode, start, count); +	} + +	pipe->flush(pipe, 0, NULL); +	return TRUE; +} + +static boolean +nv30_vbo_validate(struct nv30_context *nv30) +{ +	struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; +	struct nouveau_grobj *rankine = nv30->screen->rankine; +	struct pipe_buffer *ib = nv30->idxbuf; +	unsigned ib_format = nv30->idxbuf_format; +	unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; +	int hw; + +	if (nv30->edgeflags) { +		/*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/ +		return FALSE; +	} + +	vtxbuf = so_new(20, 18); +	so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr); +	vtxfmt = so_new(17, 0); +	so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr); + +	for (hw = 0; hw < nv30->vtxelt_nr; hw++) { +		struct pipe_vertex_element *ve; +		struct pipe_vertex_buffer *vb; +		unsigned type, ncomp; + +		ve = &nv30->vtxelt[hw]; +		vb = &nv30->vtxbuf[ve->vertex_buffer_index]; + +		if (!vb->stride) { +			if (!sattr) +				sattr = so_new(16 * 5, 0); + +			if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) { +				so_data(vtxbuf, 0); +				so_data(vtxfmt, NV34TCL_VTXFMT_TYPE_FLOAT); +				continue; +			} +		} + +		if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { +			/*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/ +			so_ref(NULL, &vtxbuf); +			so_ref(NULL, &vtxfmt); +			return FALSE; +		} + +		so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, +			 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, +			 0, NV34TCL_VTXBUF_ADDRESS_DMA1); +		so_data (vtxfmt, ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) | +				  (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type)); +	} + +	if (ib) { +		so_method(vtxbuf, rankine, NV34TCL_IDXBUF_ADDRESS, 2); +		so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); +		so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, +			  0, NV34TCL_IDXBUF_FORMAT_DMA1); +	} + +	so_method(vtxbuf, rankine, 0x1710, 1); +	so_data  (vtxbuf, 0); + +	so_ref(vtxbuf, &nv30->state.hw[NV30_STATE_VTXBUF]); +	nv30->state.dirty |= (1ULL << NV30_STATE_VTXBUF); +	so_ref(vtxfmt, &nv30->state.hw[NV30_STATE_VTXFMT]); +	nv30->state.dirty |= (1ULL << NV30_STATE_VTXFMT); +	so_ref(sattr, &nv30->state.hw[NV30_STATE_VTXATTR]); +	nv30->state.dirty |= (1ULL << NV30_STATE_VTXATTR); +	return FALSE; +} + +struct nv30_state_entry nv30_state_vbo = { +	.validate = nv30_vbo_validate, +	.dirty = { +		.pipe = NV30_NEW_ARRAYS, +		.hw = 0, +	} +}; diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c new file mode 100644 index 0000000000..d262725057 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -0,0 +1,838 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" + +#include "nv30_context.h" +#include "nv30_state.h" + +/* TODO (at least...): + *  1. Indexed consts  + ARL + *  2. Arb. swz/negation + *  3. NV_vp11, NV_vp2, NV_vp3 features + *       - extra arith opcodes + *       - branching + *       - texture sampling + *       - indexed attribs + *       - indexed results + *  4. bugs + */ + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 8 +#define MASK_Y 4 +#define MASK_Z 2 +#define MASK_W 1 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE 0 +#define DEF_CTEST 0 +#include "nv30_shader.h" + +#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv30_sr_neg((s)) +#define abs(s) nv30_sr_abs((s)) + +struct nv30_vpc { +	struct nv30_vertex_program *vp; + +	struct nv30_vertex_program_exec *vpi; + +	unsigned output_map[PIPE_MAX_SHADER_OUTPUTS]; + +	int high_temp; +	int temp_temp_count; + +	struct nv30_sreg *imm; +	unsigned nr_imm; +}; + +static struct nv30_sreg +temp(struct nv30_vpc *vpc) +{ +	int idx; + +	idx  = vpc->temp_temp_count++; +	idx += vpc->high_temp + 1; +	return nv30_sr(NV30SR_TEMP, idx); +} + +static struct nv30_sreg +constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w) +{ +	struct nv30_vertex_program *vp = vpc->vp; +	struct nv30_vertex_program_data *vpd; +	int idx; + +	if (pipe >= 0) { +		for (idx = 0; idx < vp->nr_consts; idx++) { +			if (vp->consts[idx].index == pipe) +				return nv30_sr(NV30SR_CONST, idx); +		} +	} + +	idx = vp->nr_consts++; +	vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); +	vpd = &vp->consts[idx]; + +	vpd->index = pipe; +	vpd->value[0] = x; +	vpd->value[1] = y; +	vpd->value[2] = z; +	vpd->value[3] = w; +	return nv30_sr(NV30SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ +	nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + +static void +emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src) +{ +	struct nv30_vertex_program *vp = vpc->vp; +	uint32_t sr = 0; + +	switch (src.type) { +	case NV30SR_TEMP: +		sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT); +		sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT); +		break; +	case NV30SR_INPUT: +		sr |= (NV30_VP_SRC_REG_TYPE_INPUT << +		       NV30_VP_SRC_REG_TYPE_SHIFT); +		vp->ir |= (1 << src.index); +		hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT); +		break; +	case NV30SR_CONST: +		sr |= (NV30_VP_SRC_REG_TYPE_CONST << +		       NV30_VP_SRC_REG_TYPE_SHIFT); +		assert(vpc->vpi->const_index == -1 || +		       vpc->vpi->const_index == src.index); +		vpc->vpi->const_index = src.index; +		break; +	case NV30SR_NONE: +		sr |= (NV30_VP_SRC_REG_TYPE_INPUT << +		       NV30_VP_SRC_REG_TYPE_SHIFT); +		break; +	default: +		assert(0); +	} + +	if (src.negate) +		sr |= NV30_VP_SRC_NEGATE; + +	if (src.abs) +		hw[0] |= (1 << (21 + pos)); + +	sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) | +	       (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) | +	       (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) | +	       (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT)); + +/* + * |VVV| + * d�.�b + *  \u/ + * + */ + +	switch (pos) { +	case 0: +		hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >> +			  NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT; +		hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) << +			  NV30_VP_INST_SRC0L_SHIFT; +		break; +	case 1: +		hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT; +		break; +	case 2: +		hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >> +			  NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT; +		hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) << +			  NV30_VP_INST_SRC2L_SHIFT; +		break; +	default: +		assert(0); +	} +} + +static void +emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst) +{ +	struct nv30_vertex_program *vp = vpc->vp; + +	switch (dst.type) { +	case NV30SR_TEMP: +		hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); +		break; +	case NV30SR_OUTPUT: +		switch (dst.index) { +		case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; +		case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; +		case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; +		case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; +		case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; +		case NV30_VP_INST_DEST_PSZ  : vp->or |= (1 << 5); break; +		case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; +		case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; +		case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; +		case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; +		case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; +		case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; +		case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; +		case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; +		default: +			break; +		} + +		hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); +		hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + +		/*XXX: no way this is entirely correct, someone needs to +		 *     figure out what exactly it is. +		 */ +		hw[3] |= 0x800; +		break; +	default: +		assert(0); +	} +} + +static void +nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op, +	      struct nv30_sreg dst, int mask, +	      struct nv30_sreg s0, struct nv30_sreg s1, +	      struct nv30_sreg s2) +{ +	struct nv30_vertex_program *vp = vpc->vp; +	uint32_t *hw; + +	vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); +	vpc->vpi = &vp->insns[vp->nr_insns - 1]; +	memset(vpc->vpi, 0, sizeof(*vpc->vpi)); +	vpc->vpi->const_index = -1; + +	hw = vpc->vpi->data; + +	hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT); +	hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) | +		  (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) | +		  (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) | +		  (3 << NV30_VP_INST_COND_SWZ_W_SHIFT)); + +	hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); +//	hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK; +//	hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT); + +	if (dst.type == NV30SR_OUTPUT) { +		if (slot) +			hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); +		else +			hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); +	} else { +		if (slot) +			hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); +		else +			hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); +	} + +	emit_dst(vpc, hw, slot, dst); +	emit_src(vpc, hw, 0, s0); +	emit_src(vpc, hw, 1, s1); +	emit_src(vpc, hw, 2, s2); +} + +static INLINE struct nv30_sreg +tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { +	struct nv30_sreg src; + +	switch (fsrc->SrcRegister.File) { +	case TGSI_FILE_INPUT: +		src = nv30_sr(NV30SR_INPUT, fsrc->SrcRegister.Index); +		break; +	case TGSI_FILE_CONSTANT: +		src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); +		break; +	case TGSI_FILE_IMMEDIATE: +		src = vpc->imm[fsrc->SrcRegister.Index]; +		break; +	case TGSI_FILE_TEMPORARY: +		if (vpc->high_temp < fsrc->SrcRegister.Index) +			vpc->high_temp = fsrc->SrcRegister.Index; +		src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index); +		break; +	default: +		NOUVEAU_ERR("bad src file\n"); +		break; +	} + +	src.abs = fsrc->SrcRegisterExtMod.Absolute; +	src.negate = fsrc->SrcRegister.Negate; +	src.swz[0] = fsrc->SrcRegister.SwizzleX; +	src.swz[1] = fsrc->SrcRegister.SwizzleY; +	src.swz[2] = fsrc->SrcRegister.SwizzleZ; +	src.swz[3] = fsrc->SrcRegister.SwizzleW; +	return src; +} + +static INLINE struct nv30_sreg +tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) { +	struct nv30_sreg dst; + +	switch (fdst->DstRegister.File) { +	case TGSI_FILE_OUTPUT: +		dst = nv30_sr(NV30SR_OUTPUT, +			      vpc->output_map[fdst->DstRegister.Index]); + +		break; +	case TGSI_FILE_TEMPORARY: +		dst = nv30_sr(NV30SR_TEMP, fdst->DstRegister.Index); +		if (vpc->high_temp < dst.index) +			vpc->high_temp = dst.index; +		break; +	default: +		NOUVEAU_ERR("bad dst file\n"); +		break; +	} + +	return dst; +} + +static INLINE int +tgsi_mask(uint tgsi) +{ +	int mask = 0; + +	if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; +	if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; +	if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; +	if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; +	return mask; +} + +static boolean +nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, +				const struct tgsi_full_instruction *finst) +{ +	struct nv30_sreg src[3], dst, tmp; +	struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); +	int mask; +	int ai = -1, ci = -1; +	int i; + +	if (finst->Instruction.Opcode == TGSI_OPCODE_END) +		return TRUE; + +	vpc->temp_temp_count = 0; +	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { +		const struct tgsi_full_src_register *fsrc; + +		fsrc = &finst->FullSrcRegisters[i]; +		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { +			src[i] = tgsi_src(vpc, fsrc); +		} +	} + +	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { +		const struct tgsi_full_src_register *fsrc; + +		fsrc = &finst->FullSrcRegisters[i]; +		switch (fsrc->SrcRegister.File) { +		case TGSI_FILE_INPUT: +			if (ai == -1 || ai == fsrc->SrcRegister.Index) { +				ai = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(vpc, fsrc); +			} else { +				src[i] = temp(vpc); +				arith(vpc, 0, OP_MOV, src[i], MASK_ALL, +				      tgsi_src(vpc, fsrc), none, none); +			} +			break; +		/*XXX: index comparison is broken now that consts come from +		 *     two different register files. +		 */ +		case TGSI_FILE_CONSTANT: +		case TGSI_FILE_IMMEDIATE: +			if (ci == -1 || ci == fsrc->SrcRegister.Index) { +				ci = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(vpc, fsrc); +			} else { +				src[i] = temp(vpc); +				arith(vpc, 0, OP_MOV, src[i], MASK_ALL, +				      tgsi_src(vpc, fsrc), none, none); +			} +			break; +		case TGSI_FILE_TEMPORARY: +			/* handled above */ +			break; +		default: +			NOUVEAU_ERR("bad src file\n"); +			return FALSE; +		} +	} + +	dst  = tgsi_dst(vpc, &finst->FullDstRegisters[0]); +	mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + +	switch (finst->Instruction.Opcode) { +	case TGSI_OPCODE_ABS: +		arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); +		break; +	case TGSI_OPCODE_ADD: +		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); +		break; +	case TGSI_OPCODE_ARL: +		arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_DP3: +		arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DP4: +		arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DPH: +		arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DST: +		arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_EX2: +		arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_EXP: +		arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_FLR: +		arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_FRC: +		arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_LG2: +		arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_LIT: +		arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_LOG: +		arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_MAD: +		arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); +		break; +	case TGSI_OPCODE_MAX: +		arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_MIN: +		arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_MOV: +		arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_MUL: +		arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_POW: +		tmp = temp(vpc); +		arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, +		      swz(src[0], X, X, X, X)); +		arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), +		      swz(src[1], X, X, X, X), none); +		arith(vpc, 1, OP_EX2, dst, mask, none, none, +		      swz(tmp, X, X, X, X)); +		break; +	case TGSI_OPCODE_RCP: +		arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_RET: +		break; +	case TGSI_OPCODE_RSQ: +		arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_SGE: +		arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SGT: +		arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SLT: +		arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SUB: +		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); +		break; +	case TGSI_OPCODE_XPD: +		tmp = temp(vpc); +		arith(vpc, 0, OP_MUL, tmp, mask, +		      swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); +		arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), +		      swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), +		      neg(tmp)); +		break; +	default: +		NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); +		return FALSE; +	} + +	return TRUE; +} + +static boolean +nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, +				const struct tgsi_full_declaration *fdec) +{ +	int hw; + +	switch (fdec->Semantic.SemanticName) { +	case TGSI_SEMANTIC_POSITION: +		hw = NV30_VP_INST_DEST_POS; +		break; +	case TGSI_SEMANTIC_COLOR: +		if (fdec->Semantic.SemanticIndex == 0) { +			hw = NV30_VP_INST_DEST_COL0; +		} else +		if (fdec->Semantic.SemanticIndex == 1) { +			hw = NV30_VP_INST_DEST_COL1; +		} else { +			NOUVEAU_ERR("bad colour semantic index\n"); +			return FALSE; +		} +		break; +	case TGSI_SEMANTIC_BCOLOR: +		if (fdec->Semantic.SemanticIndex == 0) { +			hw = NV30_VP_INST_DEST_BFC0; +		} else +		if (fdec->Semantic.SemanticIndex == 1) { +			hw = NV30_VP_INST_DEST_BFC1; +		} else { +			NOUVEAU_ERR("bad bcolour semantic index\n"); +			return FALSE; +		} +		break; +	case TGSI_SEMANTIC_FOG: +		hw = NV30_VP_INST_DEST_FOGC; +		break; +	case TGSI_SEMANTIC_PSIZE: +		hw = NV30_VP_INST_DEST_PSZ; +		break; +	case TGSI_SEMANTIC_GENERIC: +		if (fdec->Semantic.SemanticIndex <= 7) { +			hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); +		} else { +			NOUVEAU_ERR("bad generic semantic index\n"); +			return FALSE; +		} +		break; +	default: +		NOUVEAU_ERR("bad output semantic\n"); +		return FALSE; +	} + +	vpc->output_map[fdec->DeclarationRange.First] = hw; +	return TRUE; +} + +static boolean +nv30_vertprog_prepare(struct nv30_vpc *vpc) +{ +	struct tgsi_parse_context p; +	int nr_imm = 0; + +	tgsi_parse_init(&p, vpc->vp->pipe.tokens); +	while (!tgsi_parse_end_of_tokens(&p)) { +		const union tgsi_full_token *tok = &p.FullToken; + +		tgsi_parse_token(&p); +		switch(tok->Token.Type) { +		case TGSI_TOKEN_TYPE_IMMEDIATE: +			nr_imm++; +			break; +		default: +			break; +		} +	} +	tgsi_parse_free(&p); + +	if (nr_imm) { +		vpc->imm = CALLOC(nr_imm, sizeof(struct nv30_sreg)); +		assert(vpc->imm); +	} + +	return TRUE; +} + +static void +nv30_vertprog_translate(struct nv30_context *nv30, +			struct nv30_vertex_program *vp) +{ +	struct tgsi_parse_context parse; +	struct nv30_vpc *vpc = NULL; + +	tgsi_dump(vp->pipe.tokens,0); + +	vpc = CALLOC(1, sizeof(struct nv30_vpc)); +	if (!vpc) +		return; +	vpc->vp = vp; +	vpc->high_temp = -1; + +	if (!nv30_vertprog_prepare(vpc)) { +		FREE(vpc); +		return; +	} + +	tgsi_parse_init(&parse, vp->pipe.tokens); + +	while (!tgsi_parse_end_of_tokens(&parse)) { +		tgsi_parse_token(&parse); + +		switch (parse.FullToken.Token.Type) { +		case TGSI_TOKEN_TYPE_DECLARATION: +		{ +			const struct tgsi_full_declaration *fdec; +			fdec = &parse.FullToken.FullDeclaration; +			switch (fdec->Declaration.File) { +			case TGSI_FILE_OUTPUT: +				if (!nv30_vertprog_parse_decl_output(vpc, fdec)) +					goto out_err; +				break; +			default: +				break; +			} +		} +			break; +		case TGSI_TOKEN_TYPE_IMMEDIATE: +		{ +			const struct tgsi_full_immediate *imm; + +			imm = &parse.FullToken.FullImmediate; +			assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); +			assert(imm->Immediate.NrTokens == 4 + 1); +			vpc->imm[vpc->nr_imm++] = +				constant(vpc, -1, +					 imm->u.ImmediateFloat32[0].Float, +					 imm->u.ImmediateFloat32[1].Float, +					 imm->u.ImmediateFloat32[2].Float, +					 imm->u.ImmediateFloat32[3].Float); +		} +			break; +		case TGSI_TOKEN_TYPE_INSTRUCTION: +		{ +			const struct tgsi_full_instruction *finst; +			finst = &parse.FullToken.FullInstruction; +			if (!nv30_vertprog_parse_instruction(vpc, finst)) +				goto out_err; +		} +			break; +		default: +			break; +		} +	} + +	vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST; +	vp->translated = TRUE; +out_err: +	tgsi_parse_free(&parse); +	FREE(vpc); +} + +static boolean +nv30_vertprog_validate(struct nv30_context *nv30) +{  +	struct nouveau_winsys *nvws = nv30->nvws; +	struct pipe_winsys *ws = nv30->pipe.winsys; +	struct nouveau_grobj *rankine = nv30->screen->rankine; +	struct nv30_vertex_program *vp; +	struct pipe_buffer *constbuf; +	boolean upload_code = FALSE, upload_data = FALSE; +	int i; + +	vp = nv30->vertprog; +	constbuf = nv30->constbuf[PIPE_SHADER_VERTEX]; + +	/* Translate TGSI shader into hw bytecode */ +	if (!vp->translated) { +		nv30_vertprog_translate(nv30, vp); +		if (!vp->translated) +			return FALSE; +	} + +	/* Allocate hw vtxprog exec slots */ +	if (!vp->exec) { +		struct nouveau_resource *heap = nv30->screen->vp_exec_heap; +		struct nouveau_stateobj *so; +		uint vplen = vp->nr_insns; + +		if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { +			while (heap->next && heap->size < vplen) { +				struct nv30_vertex_program *evict; +				 +				evict = heap->next->priv; +				nvws->res_free(&evict->exec); +			} + +			if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) +				assert(0); +		} + +		so = so_new(2, 0); +		so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); +		so_data  (so, vp->exec->start); +		so_ref(so, &vp->so); + +		upload_code = TRUE; +	} + +	/* Allocate hw vtxprog const slots */ +	if (vp->nr_consts && !vp->data) { +		struct nouveau_resource *heap = nv30->screen->vp_data_heap; + +		if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { +			while (heap->next && heap->size < vp->nr_consts) { +				struct nv30_vertex_program *evict; +				 +				evict = heap->next->priv; +				nvws->res_free(&evict->data); +			} + +			if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) +				assert(0); +		} + +		/*XXX: handle this some day */ +		assert(vp->data->start >= vp->data_start_min); + +		upload_data = TRUE; +		if (vp->data_start != vp->data->start) +			upload_code = TRUE; +	} + +	/* If exec or data segments moved we need to patch the program to +	 * fixup offsets and register IDs. +	 */ +	if (vp->exec_start != vp->exec->start) { +		for (i = 0; i < vp->nr_insns; i++) { +			struct nv30_vertex_program_exec *vpi = &vp->insns[i]; + +			if (vpi->has_branch_offset) { +				assert(0); +			} +		} + +		vp->exec_start = vp->exec->start; +	} + +	if (vp->nr_consts && vp->data_start != vp->data->start) { +		for (i = 0; i < vp->nr_insns; i++) { +			struct nv30_vertex_program_exec *vpi = &vp->insns[i]; + +			if (vpi->const_index >= 0) { +				vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK; +				vpi->data[1] |= +					(vpi->const_index + vp->data->start) << +					NV30_VP_INST_CONST_SRC_SHIFT; + +			} +		} + +		vp->data_start = vp->data->start; +	} + +	/* Update + Upload constant values */ +	if (vp->nr_consts) { +		float *map = NULL; + +		if (constbuf) { +			map = ws->buffer_map(ws, constbuf, +					     PIPE_BUFFER_USAGE_CPU_READ); +		} + +		for (i = 0; i < vp->nr_consts; i++) { +			struct nv30_vertex_program_data *vpd = &vp->consts[i]; + +			if (vpd->index >= 0) { +				if (!upload_data && +				    !memcmp(vpd->value, &map[vpd->index * 4], +					    4 * sizeof(float))) +					continue; +				memcpy(vpd->value, &map[vpd->index * 4], +				       4 * sizeof(float)); +			} + +			BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); +			OUT_RING  (i + vp->data->start); +			OUT_RINGp ((uint32_t *)vpd->value, 4); +		} + +		if (constbuf) { +			ws->buffer_unmap(ws, constbuf); +		} +	} + +	/* Upload vtxprog */ +	if (upload_code) { +#if 0 +		for (i = 0; i < vp->nr_insns; i++) { +			NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n", +				i, vp->insns[i].data[0], vp->insns[i].data[1], +				vp->insns[i].data[2], vp->insns[i].data[3]); +		} +#endif +		BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); +		OUT_RING  (vp->exec->start); +		for (i = 0; i < vp->nr_insns; i++) { +			BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4); +			OUT_RINGp (vp->insns[i].data, 4); +		} +	} + +	if (vp->so != nv30->state.hw[NV30_STATE_VERTPROG]) { +		so_ref(vp->so, &nv30->state.hw[NV30_STATE_VERTPROG]); +		return TRUE; +	} + +	return FALSE; +} + +void +nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp) +{ +	struct nouveau_winsys *nvws = nv30->screen->nvws; + +	vp->translated = FALSE; + +	if (vp->nr_insns) { +		FREE(vp->insns); +		vp->insns = NULL; +		vp->nr_insns = 0; +	} + +	if (vp->nr_consts) { +		FREE(vp->consts); +		vp->consts = NULL; +		vp->nr_consts = 0; +	} + +	nvws->res_free(&vp->exec); +	vp->exec_start = 0; +	nvws->res_free(&vp->data); +	vp->data_start = 0; +	vp->data_start_min = 0; + +	vp->ir = vp->or = 0; +	so_ref(NULL, &vp->so); +} + +struct nv30_state_entry nv30_state_vertprog = { +	.validate = nv30_vertprog_validate, +	.dirty = { +		.pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/, +		.hw = NV30_STATE_VERTPROG, +	} +}; diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile new file mode 100644 index 0000000000..9c8eadf7e4 --- /dev/null +++ b/src/gallium/drivers/nv40/Makefile @@ -0,0 +1,37 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv40 + +DRIVER_SOURCES = \ +	nv40_clear.c \ +	nv40_context.c \ +	nv40_draw.c \ +	nv40_fragprog.c \ +	nv40_fragtex.c \ +	nv40_miptree.c \ +	nv40_query.c \ +	nv40_screen.c \ +	nv40_state.c \ +	nv40_state_blend.c \ +	nv40_state_emit.c \ +	nv40_state_fb.c \ +	nv40_state_rasterizer.c \ +	nv40_state_scissor.c \ +	nv40_state_stipple.c \ +	nv40_state_viewport.c \ +	nv40_state_zsa.c \ +	nv40_surface.c \ +	nv40_vbo.c \ +	nv40_vertprog.c + +C_SOURCES = \ +	$(COMMON_SOURCES) \ +	$(DRIVER_SOURCES) + +ASM_SOURCES =  + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv40/nv40_clear.c b/src/gallium/drivers/nv40/nv40_clear.c new file mode 100644 index 0000000000..59efd620e3 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_clear.c @@ -0,0 +1,13 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv40_context.h" + +void +nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, +	   unsigned clearValue) +{ +	pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +	ps->status = PIPE_SURFACE_STATUS_CLEAR; +} diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c new file mode 100644 index 0000000000..5d325f5067 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -0,0 +1,72 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv40_context.h" +#include "nv40_screen.h" + +static void +nv40_flush(struct pipe_context *pipe, unsigned flags, +	   struct pipe_fence_handle **fence) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	 +	if (flags & PIPE_FLUSH_TEXTURE_CACHE) { +		BEGIN_RING(curie, 0x1fd8, 1); +		OUT_RING  (2); +		BEGIN_RING(curie, 0x1fd8, 1); +		OUT_RING  (1); +	} + +	FIRE_RING(fence); +} + +static void +nv40_destroy(struct pipe_context *pipe) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	if (nv40->draw) +		draw_destroy(nv40->draw); +	FREE(nv40); +} + +struct pipe_context * +nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ +	struct nv40_screen *screen = nv40_screen(pscreen); +	struct pipe_winsys *ws = pscreen->winsys; +	struct nv40_context *nv40; +	struct nouveau_winsys *nvws = screen->nvws; + +	nv40 = CALLOC(1, sizeof(struct nv40_context)); +	if (!nv40) +		return NULL; +	nv40->screen = screen; +	nv40->pctx_id = pctx_id; + +	nv40->nvws = nvws; + +	nv40->pipe.winsys = ws; +	nv40->pipe.screen = pscreen; +	nv40->pipe.destroy = nv40_destroy; +	nv40->pipe.draw_arrays = nv40_draw_arrays; +	nv40->pipe.draw_elements = nv40_draw_elements; +	nv40->pipe.clear = nv40_clear; +	nv40->pipe.flush = nv40_flush; + +	nv40_init_query_functions(nv40); +	nv40_init_surface_functions(nv40); +	nv40_init_state_functions(nv40); + +	/* Create, configure, and install fallback swtnl path */ +	nv40->draw = draw_create(); +	draw_wide_point_threshold(nv40->draw, 9999999.0); +	draw_wide_line_threshold(nv40->draw, 9999999.0); +	draw_enable_line_stipple(nv40->draw, FALSE); +	draw_enable_point_sprites(nv40->draw, FALSE); +	draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40)); + +	return &nv40->pipe; +} +	 diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h new file mode 100644 index 0000000000..adcfbdd85a --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -0,0 +1,233 @@ +#ifndef __NV40_CONTEXT_H__ +#define __NV40_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx)                                              \ +	struct nv40_screen *ctx = nv40->screen +#include "nouveau/nouveau_push.h" +#include "nouveau/nouveau_stateobj.h" + +#include "nv40_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ +	fprintf(stderr, "%s:%d -  "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ +	fprintf(stderr, "nouveau: "fmt, ##args); + +enum nv40_state_index { +	NV40_STATE_FB = 0, +	NV40_STATE_VIEWPORT = 1, +	NV40_STATE_BLEND = 2, +	NV40_STATE_RAST = 3, +	NV40_STATE_ZSA = 4, +	NV40_STATE_BCOL = 5, +	NV40_STATE_CLIP = 6, +	NV40_STATE_SCISSOR = 7, +	NV40_STATE_STIPPLE = 8, +	NV40_STATE_FRAGPROG = 9, +	NV40_STATE_VERTPROG = 10, +	NV40_STATE_FRAGTEX0 = 11, +	NV40_STATE_FRAGTEX1 = 12, +	NV40_STATE_FRAGTEX2 = 13, +	NV40_STATE_FRAGTEX3 = 14, +	NV40_STATE_FRAGTEX4 = 15, +	NV40_STATE_FRAGTEX5 = 16, +	NV40_STATE_FRAGTEX6 = 17, +	NV40_STATE_FRAGTEX7 = 18, +	NV40_STATE_FRAGTEX8 = 19, +	NV40_STATE_FRAGTEX9 = 20, +	NV40_STATE_FRAGTEX10 = 21, +	NV40_STATE_FRAGTEX11 = 22, +	NV40_STATE_FRAGTEX12 = 23, +	NV40_STATE_FRAGTEX13 = 24, +	NV40_STATE_FRAGTEX14 = 25, +	NV40_STATE_FRAGTEX15 = 26, +	NV40_STATE_VERTTEX0 = 27, +	NV40_STATE_VERTTEX1 = 28, +	NV40_STATE_VERTTEX2 = 29, +	NV40_STATE_VERTTEX3 = 30, +	NV40_STATE_VTXBUF = 31, +	NV40_STATE_VTXFMT = 32, +	NV40_STATE_VTXATTR = 33, +	NV40_STATE_MAX = 34 +}; + +#include "nv40_screen.h" + +#define NV40_NEW_BLEND		(1 <<  0) +#define NV40_NEW_RAST		(1 <<  1) +#define NV40_NEW_ZSA		(1 <<  2) +#define NV40_NEW_SAMPLER	(1 <<  3) +#define NV40_NEW_FB		(1 <<  4) +#define NV40_NEW_STIPPLE	(1 <<  5) +#define NV40_NEW_SCISSOR	(1 <<  6) +#define NV40_NEW_VIEWPORT	(1 <<  7) +#define NV40_NEW_BCOL		(1 <<  8) +#define NV40_NEW_VERTPROG	(1 <<  9) +#define NV40_NEW_FRAGPROG	(1 << 10) +#define NV40_NEW_ARRAYS		(1 << 11) +#define NV40_NEW_UCP		(1 << 12) + +struct nv40_rasterizer_state { +	struct pipe_rasterizer_state pipe; +	struct nouveau_stateobj *so; +}; + +struct nv40_zsa_state { +	struct pipe_depth_stencil_alpha_state pipe; +	struct nouveau_stateobj *so; +}; + +struct nv40_blend_state { +	struct pipe_blend_state pipe; +	struct nouveau_stateobj *so; +}; + + +struct nv40_state { +	unsigned scissor_enabled; +	unsigned stipple_enabled; +	unsigned viewport_bypass; +	unsigned fp_samplers; + +	uint64_t dirty; +	struct nouveau_stateobj *hw[NV40_STATE_MAX]; +}; + +struct nv40_context { +	struct pipe_context pipe; + +	struct nouveau_winsys *nvws; +	struct nv40_screen *screen; +	unsigned pctx_id; + +	struct draw_context *draw; + +	/* HW state derived from pipe states */ +	struct nv40_state state; +	struct { +		struct nv40_vertex_program *vertprog; + +		unsigned nr_attribs; +		unsigned hw[PIPE_MAX_SHADER_INPUTS]; +		unsigned draw[PIPE_MAX_SHADER_INPUTS]; +		unsigned emit[PIPE_MAX_SHADER_INPUTS]; +	} swtnl; + +	enum { +		HW, SWTNL, SWRAST +	} render_mode; +	unsigned fallback_swtnl; +	unsigned fallback_swrast; + +	/* Context state */ +	unsigned dirty, draw_dirty; +	struct pipe_scissor_state scissor; +	unsigned stipple[32]; +	struct pipe_clip_state clip; +	struct nv40_vertex_program *vertprog; +	struct nv40_fragment_program *fragprog; +	struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; +	unsigned constbuf_nr[PIPE_SHADER_TYPES]; +	struct nv40_rasterizer_state *rasterizer; +	struct nv40_zsa_state *zsa; +	struct nv40_blend_state *blend; +	struct pipe_blend_color blend_colour; +	struct pipe_viewport_state viewport; +	struct pipe_framebuffer_state framebuffer; +	struct pipe_buffer *idxbuf; +	unsigned idxbuf_format; +	struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; +	struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; +	unsigned nr_samplers; +	unsigned nr_textures; +	unsigned dirty_samplers; +	struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; +	unsigned vtxbuf_nr; +	struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; +	unsigned vtxelt_nr; +	const unsigned *edgeflags; +}; + +static INLINE struct nv40_context * +nv40_context(struct pipe_context *pipe) +{ +	return (struct nv40_context *)pipe; +} + +struct nv40_state_entry { +	boolean (*validate)(struct nv40_context *nv40); +	struct { +		unsigned pipe; +		unsigned hw; +	} dirty; +}; + +extern void nv40_init_state_functions(struct nv40_context *nv40); +extern void nv40_init_surface_functions(struct nv40_context *nv40); +extern void nv40_init_query_functions(struct nv40_context *nv40); + +extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); + +/* nv40_draw.c */ +extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); +extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe, +					struct pipe_buffer *idxbuf, +					unsigned ib_size, unsigned mode, +					unsigned start, unsigned count); + +/* nv40_vertprog.c */ +extern void nv40_vertprog_destroy(struct nv40_context *, +				  struct nv40_vertex_program *); + +/* nv40_fragprog.c */ +extern void nv40_fragprog_destroy(struct nv40_context *, +				  struct nv40_fragment_program *); + +/* nv40_fragtex.c */ +extern void nv40_fragtex_bind(struct nv40_context *); + +/* nv40_state.c and friends */ +extern boolean nv40_state_validate(struct nv40_context *nv40); +extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40); +extern void nv40_state_emit(struct nv40_context *nv40); +extern struct nv40_state_entry nv40_state_rasterizer; +extern struct nv40_state_entry nv40_state_scissor; +extern struct nv40_state_entry nv40_state_stipple; +extern struct nv40_state_entry nv40_state_fragprog; +extern struct nv40_state_entry nv40_state_vertprog; +extern struct nv40_state_entry nv40_state_blend; +extern struct nv40_state_entry nv40_state_blend_colour; +extern struct nv40_state_entry nv40_state_zsa; +extern struct nv40_state_entry nv40_state_viewport; +extern struct nv40_state_entry nv40_state_framebuffer; +extern struct nv40_state_entry nv40_state_fragtex; +extern struct nv40_state_entry nv40_state_vbo; +extern struct nv40_state_entry nv40_state_vtxfmt; + +/* nv40_vbo.c */ +extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, +				unsigned start, unsigned count); +extern boolean nv40_draw_elements(struct pipe_context *pipe, +				  struct pipe_buffer *indexBuffer, +				  unsigned indexSize, +				  unsigned mode, unsigned start, +				  unsigned count); + +/* nv40_clear.c */ +extern void nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, +		       unsigned clearValue); + +#endif diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c new file mode 100644 index 0000000000..c83ff91d7e --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -0,0 +1,349 @@ +#include "pipe/p_shader_tokens.h" + +#include "util/u_pack_color.h" + +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pipe.h" + +#include "nv40_context.h" +#define NV40_SHADER_NO_FUCKEDNESS +#include "nv40_shader.h" + +/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very + * often at all.  Uses "quadro style" vertex submission + a fixed vertex + * layout to avoid the need to generate a vertex program or vtxfmt. + */ + +struct nv40_render_stage { +	struct draw_stage stage; +	struct nv40_context *nv40; +	unsigned prim; +}; + +static INLINE struct nv40_render_stage * +nv40_render_stage(struct draw_stage *stage) +{ +	return (struct nv40_render_stage *)stage; +} + +static INLINE void +nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) +{ +	unsigned i; + +	for (i = 0; i < nv40->swtnl.nr_attribs; i++) { +		unsigned idx = nv40->swtnl.draw[i]; +		unsigned hw = nv40->swtnl.hw[i]; + +		switch (nv40->swtnl.emit[i]) { +		case EMIT_OMIT: +			break; +		case EMIT_1F: +			BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1); +			OUT_RING  (fui(v->data[idx][0])); +			break; +		case EMIT_2F: +			BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); +			OUT_RING  (fui(v->data[idx][0])); +			OUT_RING  (fui(v->data[idx][1])); +			break; +		case EMIT_3F: +			BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); +			OUT_RING  (fui(v->data[idx][0])); +			OUT_RING  (fui(v->data[idx][1])); +			OUT_RING  (fui(v->data[idx][2])); +			break; +		case EMIT_4F: +			BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); +			OUT_RING  (fui(v->data[idx][0])); +			OUT_RING  (fui(v->data[idx][1])); +			OUT_RING  (fui(v->data[idx][2])); +			OUT_RING  (fui(v->data[idx][3])); +			break; +		case EMIT_4UB: +			BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1); +			OUT_RING  (pack_ub4(float_to_ubyte(v->data[idx][0]), +					    float_to_ubyte(v->data[idx][1]), +					    float_to_ubyte(v->data[idx][2]), +					    float_to_ubyte(v->data[idx][3]))); +			break; +		default: +			assert(0); +			break; +		} +	} +} + +static INLINE void +nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, +	       unsigned mode, unsigned count) +{ +	struct nv40_render_stage *rs = nv40_render_stage(stage); +	struct nv40_context *nv40 = rs->nv40; +	struct nouveau_pushbuf *pb = nv40->nvws->channel->pushbuf; +	unsigned i; + +	/* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ +	if (pb->remaining < ((count * 20) + 6)) { +		if (rs->prim != NV40TCL_BEGIN_END_STOP) { +			NOUVEAU_ERR("AIII, missed flush\n"); +			assert(0); +		} +		FIRE_RING(NULL); +		nv40_state_emit(nv40); +	} + +	/* Switch primitive modes if necessary */ +	if (rs->prim != mode) { +		if (rs->prim != NV40TCL_BEGIN_END_STOP) { +			BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +			OUT_RING  (NV40TCL_BEGIN_END_STOP);	 +		} + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (mode); +		rs->prim = mode; +	} + +	/* Emit vertex data */ +	for (i = 0; i < count; i++) +		nv40_render_vertex(nv40, prim->v[i]); + +	/* If it's likely we'll need to empty the push buffer soon, finish +	 * off the primitive now. +	 */ +	if (pb->remaining < ((count * 20) + 6)) { +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (NV40TCL_BEGIN_END_STOP); +		rs->prim = NV40TCL_BEGIN_END_STOP; +	} +} + +static void +nv40_render_point(struct draw_stage *draw, struct prim_header *prim) +{ +	nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1); +} + +static void +nv40_render_line(struct draw_stage *draw, struct prim_header *prim) +{ +	nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2); +} + +static void +nv40_render_tri(struct draw_stage *draw, struct prim_header *prim) +{ +	nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3); +} + +static void +nv40_render_flush(struct draw_stage *draw, unsigned flags) +{ +	struct nv40_render_stage *rs = nv40_render_stage(draw); +	struct nv40_context *nv40 = rs->nv40; + +	if (rs->prim != NV40TCL_BEGIN_END_STOP) { +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (NV40TCL_BEGIN_END_STOP); +		rs->prim = NV40TCL_BEGIN_END_STOP; +	} +} + +static void +nv40_render_reset_stipple_counter(struct draw_stage *draw) +{ +} + +static void +nv40_render_destroy(struct draw_stage *draw) +{ +	FREE(draw); +} + +static INLINE void +emit_mov(struct nv40_vertex_program *vp, +	 unsigned dst, unsigned src, unsigned vor, unsigned mask) +{ +	struct nv40_vertex_program_exec *inst; + +	vp->insns = realloc(vp->insns, +			    sizeof(struct nv40_vertex_program_exec) * +			    ++vp->nr_insns); +	inst = &vp->insns[vp->nr_insns - 1]; + +	inst->data[0] = 0x401f9c6c; +	inst->data[1] = 0x0040000d | (src << 8); +	inst->data[2] = 0x8106c083; +	inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13); +	inst->const_index = -1; +	inst->has_branch_offset = FALSE; + +	vp->ir |= (1 << src); +	if (vor != ~0) +		vp->or |= (1 << vor); +} + +static struct nv40_vertex_program * +create_drawvp(struct nv40_context *nv40) +{ +	struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program); +	unsigned i; + +	emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf); +	emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf); +	emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf); +	emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf); +	emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf); +	emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8); +	for (i = 0; i < 8; i++) +		emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf); + +	vp->insns[vp->nr_insns - 1].data[3] |= 1; +	vp->translated = TRUE; +	return vp; +} + +struct draw_stage * +nv40_draw_render_stage(struct nv40_context *nv40) +{ +	struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage); + +	if (!nv40->swtnl.vertprog) +		nv40->swtnl.vertprog = create_drawvp(nv40); + +	render->nv40 = nv40; +	render->stage.draw = nv40->draw; +	render->stage.point = nv40_render_point; +	render->stage.line = nv40_render_line; +	render->stage.tri = nv40_render_tri; +	render->stage.flush = nv40_render_flush; +	render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter; +	render->stage.destroy = nv40_render_destroy; + +	return &render->stage; +} + +boolean +nv40_draw_elements_swtnl(struct pipe_context *pipe, +			 struct pipe_buffer *idxbuf, unsigned idxbuf_size, +			 unsigned mode, unsigned start, unsigned count) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct pipe_winsys *ws = pipe->winsys; +	unsigned i; +	void *map; + +	if (!nv40_state_validate_swtnl(nv40)) +		return FALSE; +	nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF); +	nv40_state_emit(nv40); + +	for (i = 0; i < nv40->vtxbuf_nr; i++) { +		map = ws->buffer_map(ws, nv40->vtxbuf[i].buffer, +                                      PIPE_BUFFER_USAGE_CPU_READ); +		draw_set_mapped_vertex_buffer(nv40->draw, i, map); +	} + +	if (idxbuf) { +		map = ws->buffer_map(ws, idxbuf, PIPE_BUFFER_USAGE_CPU_READ); +		draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map); +	} else { +		draw_set_mapped_element_buffer(nv40->draw, 0, NULL); +	} + +	if (nv40->constbuf[PIPE_SHADER_VERTEX]) { +		const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX]; + +		map = ws->buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX], +				     PIPE_BUFFER_USAGE_CPU_READ); +		draw_set_mapped_constant_buffer(nv40->draw, map, nr); +	} + +	draw_arrays(nv40->draw, mode, start, count); + +	for (i = 0; i < nv40->vtxbuf_nr; i++) +		ws->buffer_unmap(ws, nv40->vtxbuf[i].buffer); + +	if (idxbuf) +		ws->buffer_unmap(ws, idxbuf); + +	if (nv40->constbuf[PIPE_SHADER_VERTEX]) +		ws->buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]); + +	draw_flush(nv40->draw); +	pipe->flush(pipe, 0, NULL); + +	return TRUE; +} + +static INLINE void +emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit, +	    unsigned semantic, unsigned index) +{ +	unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index); +	unsigned a = nv40->swtnl.nr_attribs++; + +	nv40->swtnl.hw[a] = hw; +	nv40->swtnl.emit[a] = emit; +	nv40->swtnl.draw[a] = draw_out; +} + +static boolean +nv40_state_vtxfmt_validate(struct nv40_context *nv40) +{ +	struct nv40_fragment_program *fp = nv40->fragprog; +	unsigned colour = 0, texcoords = 0, fog = 0, i; + +	/* Determine needed fragprog inputs */ +	for (i = 0; i < fp->info.num_inputs; i++) { +		switch (fp->info.input_semantic_name[i]) { +		case TGSI_SEMANTIC_POSITION: +			break; +		case TGSI_SEMANTIC_COLOR: +			colour |= (1 << fp->info.input_semantic_index[i]); +			break; +		case TGSI_SEMANTIC_GENERIC: +			texcoords |= (1 << fp->info.input_semantic_index[i]); +			break; +		case TGSI_SEMANTIC_FOG: +			fog = 1; +			break; +		default: +			assert(0); +		} +	} + +	nv40->swtnl.nr_attribs = 0; + +	/* Map draw vtxprog output to hw attribute IDs */ +	for (i = 0; i < 2; i++) { +		if (!(colour & (1 << i))) +			continue; +		emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i); +	} + +	for (i = 0; i < 8; i++) { +		if (!(texcoords & (1 << i))) +			continue; +		emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i); +	} + +	if (fog) { +		emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); +	} + +	emit_attrib(nv40, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0); + +	return FALSE; +} + +struct nv40_state_entry nv40_state_vtxfmt = { +	.validate = nv40_state_vtxfmt_validate, +	.dirty = { +		.pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG, +		.hw = 0 +	} +}; + diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c new file mode 100644 index 0000000000..91dcbebda0 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -0,0 +1,991 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv40_context.h" + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 1 +#define MASK_Y 2 +#define MASK_Z 4 +#define MASK_W 8 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X +#define DEF_CTEST NV40_FP_OP_COND_TR +#include "nv40_shader.h" + +#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv40_sr_neg((s)) +#define abs(s) nv40_sr_abs((s)) +#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v) + +#define MAX_CONSTS 128 +#define MAX_IMM 32 +struct nv40_fpc { +	struct nv40_fragment_program *fp; + +	uint attrib_map[PIPE_MAX_SHADER_INPUTS]; + +	unsigned r_temps; +	unsigned r_temps_discard; +	struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; +	struct nv40_sreg *r_temp; + +	int num_regs; + +	unsigned inst_offset; +	unsigned have_const; + +	struct { +		int pipe; +		float vals[4]; +	} consts[MAX_CONSTS]; +	int nr_consts; + +	struct nv40_sreg imm[MAX_IMM]; +	unsigned nr_imm; +}; + +static INLINE struct nv40_sreg +temp(struct nv40_fpc *fpc) +{ +	int idx = ffs(~fpc->r_temps) - 1; + +	if (idx < 0) { +		NOUVEAU_ERR("out of temps!!\n"); +		assert(0); +		return nv40_sr(NV40SR_TEMP, 0); +	} + +	fpc->r_temps |= (1 << idx); +	fpc->r_temps_discard |= (1 << idx); +	return nv40_sr(NV40SR_TEMP, idx); +} + +static INLINE void +release_temps(struct nv40_fpc *fpc) +{ +	fpc->r_temps &= ~fpc->r_temps_discard; +	fpc->r_temps_discard = 0; +} + +static INLINE struct nv40_sreg +constant(struct nv40_fpc *fpc, int pipe, float vals[4]) +{ +	int idx; + +	if (fpc->nr_consts == MAX_CONSTS) +		assert(0); +	idx = fpc->nr_consts++; + +	fpc->consts[idx].pipe = pipe; +	if (pipe == -1) +		memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); +	return nv40_sr(NV40SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ +	nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \ +			(d), (m), (s0), (s1), (s2)) +#define tex(cc,s,o,u,d,m,s0,s1,s2) \ +	nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \ +		    (d), (m), (s0), none, none) + +static void +grow_insns(struct nv40_fpc *fpc, int size) +{ +	struct nv40_fragment_program *fp = fpc->fp; + +	fp->insn_len += size; +	fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); +} + +static void +emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) +{ +	struct nv40_fragment_program *fp = fpc->fp; +	uint32_t *hw = &fp->insn[fpc->inst_offset]; +	uint32_t sr = 0; + +	switch (src.type) { +	case NV40SR_INPUT: +		sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); +		hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT); +		break; +	case NV40SR_OUTPUT: +		sr |= NV40_FP_REG_SRC_HALF; +		/* fall-through */ +	case NV40SR_TEMP: +		sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT); +		sr |= (src.index << NV40_FP_REG_SRC_SHIFT); +		break; +	case NV40SR_CONST: +		if (!fpc->have_const) { +			grow_insns(fpc, 4); +			fpc->have_const = 1; +		} + +		hw = &fp->insn[fpc->inst_offset]; +		if (fpc->consts[src.index].pipe >= 0) { +			struct nv40_fragment_program_data *fpd; + +			fp->consts = realloc(fp->consts, ++fp->nr_consts * +					     sizeof(*fpd)); +			fpd = &fp->consts[fp->nr_consts - 1]; +			fpd->offset = fpc->inst_offset + 4; +			fpd->index = fpc->consts[src.index].pipe; +			memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); +		} else { +			memcpy(&fp->insn[fpc->inst_offset + 4], +				fpc->consts[src.index].vals, +				sizeof(uint32_t) * 4); +		} + +		sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);	 +		break; +	case NV40SR_NONE: +		sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); +		break; +	default: +		assert(0); +	} + +	if (src.negate) +		sr |= NV40_FP_REG_NEGATE; + +	if (src.abs) +		hw[1] |= (1 << (29 + pos)); + +	sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) | +	       (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) | +	       (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) | +	       (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT)); + +	hw[pos + 1] |= sr; +} + +static void +emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst) +{ +	struct nv40_fragment_program *fp = fpc->fp; +	uint32_t *hw = &fp->insn[fpc->inst_offset]; + +	switch (dst.type) { +	case NV40SR_TEMP: +		if (fpc->num_regs < (dst.index + 1)) +			fpc->num_regs = dst.index + 1; +		break; +	case NV40SR_OUTPUT: +		if (dst.index == 1) { +			fp->fp_control |= 0xe; +		} else { +			hw[0] |= NV40_FP_OP_OUT_REG_HALF; +		} +		break; +	case NV40SR_NONE: +		hw[0] |= (1 << 30); +		break; +	default: +		assert(0); +	} + +	hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT); +} + +static void +nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, +	      struct nv40_sreg dst, int mask, +	      struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +{ +	struct nv40_fragment_program *fp = fpc->fp; +	uint32_t *hw; + +	fpc->inst_offset = fp->insn_len; +	fpc->have_const = 0; +	grow_insns(fpc, 4); +	hw = &fp->insn[fpc->inst_offset]; +	memset(hw, 0, sizeof(uint32_t) * 4); + +	if (op == NV40_FP_OP_OPCODE_KIL) +		fp->fp_control |= NV40TCL_FP_CONTROL_KIL; +	hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT); +	hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT); +	hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT); + +	if (sat) +		hw[0] |= NV40_FP_OP_OUT_SAT; + +	if (dst.cc_update) +		hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE; +	hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT); +	hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) | +		  (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) | +		  (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) | +		  (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT)); + +	emit_dst(fpc, dst); +	emit_src(fpc, 0, s0); +	emit_src(fpc, 1, s1); +	emit_src(fpc, 2, s2); +} + +static void +nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit, +	    struct nv40_sreg dst, int mask, +	    struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +{ +	struct nv40_fragment_program *fp = fpc->fp; + +	nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); + +	fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT); +	fp->samplers |= (1 << unit); +} + +static INLINE struct nv40_sreg +tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) +{ +	struct nv40_sreg src; + +	switch (fsrc->SrcRegister.File) { +	case TGSI_FILE_INPUT: +		src = nv40_sr(NV40SR_INPUT, +			      fpc->attrib_map[fsrc->SrcRegister.Index]); +		break; +	case TGSI_FILE_CONSTANT: +		src = constant(fpc, fsrc->SrcRegister.Index, NULL); +		break; +	case TGSI_FILE_IMMEDIATE: +		assert(fsrc->SrcRegister.Index < fpc->nr_imm); +		src = fpc->imm[fsrc->SrcRegister.Index]; +		break; +	case TGSI_FILE_TEMPORARY: +		src = fpc->r_temp[fsrc->SrcRegister.Index]; +		break; +	/* NV40 fragprog result regs are just temps, so this is simple */ +	case TGSI_FILE_OUTPUT: +		src = fpc->r_result[fsrc->SrcRegister.Index]; +		break; +	default: +		NOUVEAU_ERR("bad src file\n"); +		break; +	} + +	src.abs = fsrc->SrcRegisterExtMod.Absolute; +	src.negate = fsrc->SrcRegister.Negate; +	src.swz[0] = fsrc->SrcRegister.SwizzleX; +	src.swz[1] = fsrc->SrcRegister.SwizzleY; +	src.swz[2] = fsrc->SrcRegister.SwizzleZ; +	src.swz[3] = fsrc->SrcRegister.SwizzleW; +	return src; +} + +static INLINE struct nv40_sreg +tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { +	switch (fdst->DstRegister.File) { +	case TGSI_FILE_OUTPUT: +		return fpc->r_result[fdst->DstRegister.Index]; +	case TGSI_FILE_TEMPORARY: +		return fpc->r_temp[fdst->DstRegister.Index]; +	case TGSI_FILE_NULL: +		return nv40_sr(NV40SR_NONE, 0); +	default: +		NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); +		return nv40_sr(NV40SR_NONE, 0); +	} +} + +static INLINE int +tgsi_mask(uint tgsi) +{ +	int mask = 0; + +	if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; +	if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; +	if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; +	if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; +	return mask; +} + +static boolean +src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, +	       struct nv40_sreg *src) +{ +	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); +	struct nv40_sreg tgsi = tgsi_src(fpc, fsrc); +	uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; +	uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, +			fsrc->SrcRegisterExtSwz.NegateY, +			fsrc->SrcRegisterExtSwz.NegateZ, +			fsrc->SrcRegisterExtSwz.NegateW }; +	uint c; + +	for (c = 0; c < 4; c++) { +		switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { +		case TGSI_EXTSWIZZLE_X: +		case TGSI_EXTSWIZZLE_Y: +		case TGSI_EXTSWIZZLE_Z: +		case TGSI_EXTSWIZZLE_W: +			mask |= (1 << c); +			break; +		case TGSI_EXTSWIZZLE_ZERO: +			zero_mask |= (1 << c); +			tgsi.swz[c] = SWZ_X; +			break; +		case TGSI_EXTSWIZZLE_ONE: +			one_mask |= (1 << c); +			tgsi.swz[c] = SWZ_X; +			break; +		default: +			assert(0); +		} + +		if (!tgsi.negate && neg[c]) +			neg_mask |= (1 << c); +	} + +	if (mask == MASK_ALL && !neg_mask) +		return TRUE; + +	*src = temp(fpc); + +	if (mask) +		arith(fpc, 0, MOV, *src, mask, tgsi, none, none); + +	if (zero_mask) +		arith(fpc, 0, SFL, *src, zero_mask, *src, none, none); + +	if (one_mask) +		arith(fpc, 0, STR, *src, one_mask, *src, none, none); + +	if (neg_mask) { +		struct nv40_sreg one = temp(fpc); +		arith(fpc, 0, STR, one, neg_mask, one, none, none); +		arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none); +	} + +	return FALSE; +} + +static boolean +nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, +				const struct tgsi_full_instruction *finst) +{ +	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); +	struct nv40_sreg src[3], dst, tmp; +	int mask, sat, unit; +	int ai = -1, ci = -1, ii = -1; +	int i; + +	if (finst->Instruction.Opcode == TGSI_OPCODE_END) +		return TRUE; + +	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { +		const struct tgsi_full_src_register *fsrc; + +		fsrc = &finst->FullSrcRegisters[i]; +		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { +			src[i] = tgsi_src(fpc, fsrc); +		} +	} + +	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { +		const struct tgsi_full_src_register *fsrc; + +		fsrc = &finst->FullSrcRegisters[i]; + +		switch (fsrc->SrcRegister.File) { +		case TGSI_FILE_INPUT: +		case TGSI_FILE_CONSTANT: +		case TGSI_FILE_TEMPORARY: +			if (!src_native_swz(fpc, fsrc, &src[i])) +				continue; +			break; +		default: +			break; +		} + +		switch (fsrc->SrcRegister.File) { +		case TGSI_FILE_INPUT: +			if (ai == -1 || ai == fsrc->SrcRegister.Index) { +				ai = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(fpc, fsrc); +			} else { +				src[i] = temp(fpc); +				arith(fpc, 0, MOV, src[i], MASK_ALL, +				      tgsi_src(fpc, fsrc), none, none); +			} +			break; +		case TGSI_FILE_CONSTANT: +			if ((ci == -1 && ii == -1) || +			    ci == fsrc->SrcRegister.Index) { +				ci = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(fpc, fsrc); +			} else { +				src[i] = temp(fpc); +				arith(fpc, 0, MOV, src[i], MASK_ALL, +				      tgsi_src(fpc, fsrc), none, none); +			} +			break; +		case TGSI_FILE_IMMEDIATE: +			if ((ci == -1 && ii == -1) || +			    ii == fsrc->SrcRegister.Index) { +				ii = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(fpc, fsrc); +			} else { +				src[i] = temp(fpc); +				arith(fpc, 0, MOV, src[i], MASK_ALL, +				      tgsi_src(fpc, fsrc), none, none); +			} +			break; +		case TGSI_FILE_TEMPORARY: +			/* handled above */ +			break; +		case TGSI_FILE_SAMPLER: +			unit = fsrc->SrcRegister.Index; +			break; +		case TGSI_FILE_OUTPUT: +			break; +		default: +			NOUVEAU_ERR("bad src file\n"); +			return FALSE; +		} +	} + +	dst  = tgsi_dst(fpc, &finst->FullDstRegisters[0]); +	mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); +	sat  = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); + +	switch (finst->Instruction.Opcode) { +	case TGSI_OPCODE_ABS: +		arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); +		break; +	case TGSI_OPCODE_ADD: +		arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_CMP: +		tmp = temp(fpc); +		arith(fpc, sat, MOV, dst, mask, src[2], none, none); +		tmp.cc_update = 1; +		arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); +		dst.cc_test = NV40_VP_INST_COND_LT; +		arith(fpc, sat, MOV, dst, mask, src[1], none, none); +		break; +	case TGSI_OPCODE_COS: +		arith(fpc, sat, COS, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_DDX: +		if (mask & (MASK_Z | MASK_W)) { +			tmp = temp(fpc); +			arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, +			      swz(src[0], Z, W, Z, W), none, none); +			arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, +			      swz(tmp, X, Y, X, Y), none, none); +			arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, src[0], +			      none, none); +			arith(fpc, 0, MOV, dst, mask, tmp, none, none); +		} else { +			arith(fpc, sat, DDX, dst, mask, src[0], none, none); +		} +		break; +	case TGSI_OPCODE_DDY: +		if (mask & (MASK_Z | MASK_W)) { +			tmp = temp(fpc); +			arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, +			      swz(src[0], Z, W, Z, W), none, none); +			arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, +			      swz(tmp, X, Y, X, Y), none, none); +			arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, src[0], +			      none, none); +			arith(fpc, 0, MOV, dst, mask, tmp, none, none); +		} else { +			arith(fpc, sat, DDY, dst, mask, src[0], none, none); +		} +		break; +	case TGSI_OPCODE_DP3: +		arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DP4: +		arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DPH: +		tmp = temp(fpc); +		arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); +		arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), +		      swz(src[1], W, W, W, W), none); +		break; +	case TGSI_OPCODE_DST: +		arith(fpc, sat, DST, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_EX2: +		arith(fpc, sat, EX2, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_FLR: +		arith(fpc, sat, FLR, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_FRC: +		arith(fpc, sat, FRC, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_KILP: +		arith(fpc, 0, KIL, none, 0, none, none, none); +		break; +	case TGSI_OPCODE_KIL: +		dst = nv40_sr(NV40SR_NONE, 0); +		dst.cc_update = 1; +		arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); +		dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT; +		arith(fpc, 0, KIL, dst, 0, none, none, none); +		break; +	case TGSI_OPCODE_LG2: +		arith(fpc, sat, LG2, dst, mask, src[0], none, none); +		break; +//	case TGSI_OPCODE_LIT: +	case TGSI_OPCODE_LRP: +		tmp = temp(fpc); +		arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); +		arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); +		break; +	case TGSI_OPCODE_MAD: +		arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); +		break; +	case TGSI_OPCODE_MAX: +		arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_MIN: +		arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_MOV: +		arith(fpc, sat, MOV, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_MUL: +		arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_NOISE1: +	case TGSI_OPCODE_NOISE2: +	case TGSI_OPCODE_NOISE3: +	case TGSI_OPCODE_NOISE4: +		arith(fpc, sat, SFL, dst, mask, none, none, none); +		break; +	case TGSI_OPCODE_POW: +		tmp = temp(fpc); +		arith(fpc, 0, LG2, tmp, MASK_X, +		      swz(src[0], X, X, X, X), none, none); +		arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), +		      swz(src[1], X, X, X, X), none); +		arith(fpc, sat, EX2, dst, mask, +		      swz(tmp, X, X, X, X), none, none); +		break; +	case TGSI_OPCODE_RCP: +		arith(fpc, sat, RCP, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_RET: +		assert(0); +		break; +	case TGSI_OPCODE_RFL: +		tmp = temp(fpc); +		arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none); +		arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none); +		arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z, +		      swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); +		arith(fpc, sat, MAD, dst, mask, +		      swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); +		break; +	case TGSI_OPCODE_RSQ: +		tmp = temp(fpc); +		arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X, +		      abs(swz(src[0], X, X, X, X)), none, none); +		arith(fpc, sat, EX2, dst, mask, +		      neg(swz(tmp, X, X, X, X)), none, none); +		break; +	case TGSI_OPCODE_SCS: +		if (mask & MASK_X) { +			arith(fpc, sat, COS, dst, MASK_X, +			      swz(src[0], X, X, X, X), none, none); +		} +		if (mask & MASK_Y) { +			arith(fpc, sat, SIN, dst, MASK_Y, +			      swz(src[0], X, X, X, X), none, none); +		} +		break; +	case TGSI_OPCODE_SEQ: +		arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SFL: +		arith(fpc, sat, SFL, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SGE: +		arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SGT: +		arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SIN: +		arith(fpc, sat, SIN, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_SLE: +		arith(fpc, sat, SLE, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SLT: +		arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SNE: +		arith(fpc, sat, SNE, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_STR: +		arith(fpc, sat, STR, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SUB: +		arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); +		break; +	case TGSI_OPCODE_TEX: +		tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_TXB: +		tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_TXP: +		tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_XPD: +		tmp = temp(fpc); +		arith(fpc, 0, MUL, tmp, mask, +		      swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); +		arith(fpc, sat, MAD, dst, (mask & ~MASK_W), +		      swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), +		      neg(tmp)); +		break; +	default: +		NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); +		return FALSE; +	} + +	release_temps(fpc); +	return TRUE; +} + +static boolean +nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, +				const struct tgsi_full_declaration *fdec) +{ +	int hw; + +	switch (fdec->Semantic.SemanticName) { +	case TGSI_SEMANTIC_POSITION: +		hw = NV40_FP_OP_INPUT_SRC_POSITION; +		break; +	case TGSI_SEMANTIC_COLOR: +		if (fdec->Semantic.SemanticIndex == 0) { +			hw = NV40_FP_OP_INPUT_SRC_COL0; +		} else +		if (fdec->Semantic.SemanticIndex == 1) { +			hw = NV40_FP_OP_INPUT_SRC_COL1; +		} else { +			NOUVEAU_ERR("bad colour semantic index\n"); +			return FALSE; +		} +		break; +	case TGSI_SEMANTIC_FOG: +		hw = NV40_FP_OP_INPUT_SRC_FOGC; +		break; +	case TGSI_SEMANTIC_GENERIC: +		if (fdec->Semantic.SemanticIndex <= 7) { +			hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic. +						     SemanticIndex); +		} else { +			NOUVEAU_ERR("bad generic semantic index\n"); +			return FALSE; +		} +		break; +	default: +		NOUVEAU_ERR("bad input semantic\n"); +		return FALSE; +	} + +	fpc->attrib_map[fdec->DeclarationRange.First] = hw; +	return TRUE; +} + +static boolean +nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, +				const struct tgsi_full_declaration *fdec) +{ +	unsigned idx = fdec->DeclarationRange.First; +	unsigned hw; + +	switch (fdec->Semantic.SemanticName) { +	case TGSI_SEMANTIC_POSITION: +		hw = 1; +		break; +	case TGSI_SEMANTIC_COLOR: +		switch (fdec->Semantic.SemanticIndex) { +		case 0: hw = 0; break; +		case 1: hw = 2; break; +		case 2: hw = 3; break; +		case 3: hw = 4; break; +		default: +			NOUVEAU_ERR("bad rcol index\n"); +			return FALSE; +		} +		break; +	default: +		NOUVEAU_ERR("bad output semantic\n"); +		return FALSE; +	} + +	fpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); +	fpc->r_temps |= (1 << hw); +	return TRUE; +} + +static boolean +nv40_fragprog_prepare(struct nv40_fpc *fpc) +{ +	struct tgsi_parse_context p; +	int high_temp = -1, i; + +	tgsi_parse_init(&p, fpc->fp->pipe.tokens); +	while (!tgsi_parse_end_of_tokens(&p)) { +		const union tgsi_full_token *tok = &p.FullToken; + +		tgsi_parse_token(&p); +		switch(tok->Token.Type) { +		case TGSI_TOKEN_TYPE_DECLARATION: +		{ +			const struct tgsi_full_declaration *fdec; +			fdec = &p.FullToken.FullDeclaration; +			switch (fdec->Declaration.File) { +			case TGSI_FILE_INPUT: +				if (!nv40_fragprog_parse_decl_attrib(fpc, fdec)) +					goto out_err; +				break; +			case TGSI_FILE_OUTPUT: +				if (!nv40_fragprog_parse_decl_output(fpc, fdec)) +					goto out_err; +				break; +			case TGSI_FILE_TEMPORARY: +				if (fdec->DeclarationRange.Last > high_temp) { +					high_temp = +						fdec->DeclarationRange.Last; +				} +				break; +			default: +				break; +			} +		} +			break; +		case TGSI_TOKEN_TYPE_IMMEDIATE: +		{ +			struct tgsi_full_immediate *imm; +			float vals[4]; +			 +			imm = &p.FullToken.FullImmediate; +			assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); +			assert(fpc->nr_imm < MAX_IMM); + +			vals[0] = imm->u.ImmediateFloat32[0].Float; +			vals[1] = imm->u.ImmediateFloat32[1].Float; +			vals[2] = imm->u.ImmediateFloat32[2].Float; +			vals[3] = imm->u.ImmediateFloat32[3].Float; +			fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); +		} +			break; +		default: +			break; +		} +	} +	tgsi_parse_free(&p); + +	if (++high_temp) { +		fpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); +		for (i = 0; i < high_temp; i++) +			fpc->r_temp[i] = temp(fpc); +		fpc->r_temps_discard = 0; +	} + +	return TRUE; + +out_err: +	if (fpc->r_temp) +		FREE(fpc->r_temp); +	tgsi_parse_free(&p); +	return FALSE; +} + +static void +nv40_fragprog_translate(struct nv40_context *nv40, +			struct nv40_fragment_program *fp) +{ +	struct tgsi_parse_context parse; +	struct nv40_fpc *fpc = NULL; + +	fpc = CALLOC(1, sizeof(struct nv40_fpc)); +	if (!fpc) +		return; +	fpc->fp = fp; +	fpc->num_regs = 2; + +	if (!nv40_fragprog_prepare(fpc)) { +		FREE(fpc); +		return; +	} + +	tgsi_parse_init(&parse, fp->pipe.tokens); + +	while (!tgsi_parse_end_of_tokens(&parse)) { +		tgsi_parse_token(&parse); + +		switch (parse.FullToken.Token.Type) { +		case TGSI_TOKEN_TYPE_INSTRUCTION: +		{ +			const struct tgsi_full_instruction *finst; + +			finst = &parse.FullToken.FullInstruction; +			if (!nv40_fragprog_parse_instruction(fpc, finst)) +				goto out_err; +		} +			break; +		default: +			break; +		} +	} + +	fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT; + +	/* Terminate final instruction */ +	fp->insn[fpc->inst_offset] |= 0x00000001; + +	/* Append NOP + END instruction, may or may not be necessary. */ +	fpc->inst_offset = fp->insn_len; +	grow_insns(fpc, 4); +	fp->insn[fpc->inst_offset + 0] = 0x00000001; +	fp->insn[fpc->inst_offset + 1] = 0x00000000; +	fp->insn[fpc->inst_offset + 2] = 0x00000000; +	fp->insn[fpc->inst_offset + 3] = 0x00000000; +	 +	fp->translated = TRUE; +out_err: +	tgsi_parse_free(&parse); +	if (fpc->r_temp) +		FREE(fpc->r_temp); +	FREE(fpc); +} + +static void +nv40_fragprog_upload(struct nv40_context *nv40, +		     struct nv40_fragment_program *fp) +{ +	struct pipe_winsys *ws = nv40->pipe.winsys; +	const uint32_t le = 1; +	uint32_t *map; +	int i; + +	map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + +#if 0 +	for (i = 0; i < fp->insn_len; i++) { +		fflush(stdout); fflush(stderr); +		NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); +		fflush(stdout); fflush(stderr); +	} +#endif + +	if ((*(const uint8_t *)&le)) { +		for (i = 0; i < fp->insn_len; i++) { +			map[i] = fp->insn[i]; +		} +	} else { +		/* Weird swapping for big-endian chips */ +		for (i = 0; i < fp->insn_len; i++) { +			map[i] = ((fp->insn[i] & 0xffff) << 16) | +				  ((fp->insn[i] >> 16) & 0xffff); +		} +	} + +	ws->buffer_unmap(ws, fp->buffer); +} + +static boolean +nv40_fragprog_validate(struct nv40_context *nv40) +{ +	struct nv40_fragment_program *fp = nv40->fragprog; +	struct pipe_buffer *constbuf = +		nv40->constbuf[PIPE_SHADER_FRAGMENT]; +	struct pipe_winsys *ws = nv40->pipe.winsys; +	struct nouveau_stateobj *so; +	boolean new_consts = FALSE; +	int i; + +	if (fp->translated) +		goto update_constants; + +	nv40->fallback_swrast &= ~NV40_NEW_FRAGPROG; +	nv40_fragprog_translate(nv40, fp); +	if (!fp->translated) { +		nv40->fallback_swrast |= NV40_NEW_FRAGPROG; +		return FALSE; +	} + +	fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); +	nv40_fragprog_upload(nv40, fp); + +	so = so_new(4, 1); +	so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1); +	so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | +		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, +		  NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1); +	so_method(so, nv40->screen->curie, NV40TCL_FP_CONTROL, 1); +	so_data  (so, fp->fp_control); +	so_ref(so, &fp->so); + +update_constants: +	if (fp->nr_consts) { +		float *map; +		 +		map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); +		for (i = 0; i < fp->nr_consts; i++) { +			struct nv40_fragment_program_data *fpd = &fp->consts[i]; +			uint32_t *p = &fp->insn[fpd->offset]; +			uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; + +			if (!memcmp(p, cb, 4 * sizeof(float))) +				continue; +			memcpy(p, cb, 4 * sizeof(float)); +			new_consts = TRUE; +		} +		ws->buffer_unmap(ws, constbuf); + +		if (new_consts) +			nv40_fragprog_upload(nv40, fp); +	} + +	if (new_consts || fp->so != nv40->state.hw[NV40_STATE_FRAGPROG]) { +		so_ref(fp->so, &nv40->state.hw[NV40_STATE_FRAGPROG]); +		return TRUE; +	} + +	return FALSE; +} + +void +nv40_fragprog_destroy(struct nv40_context *nv40, +		      struct nv40_fragment_program *fp) +{ +	if (fp->insn_len) +		FREE(fp->insn); +} + +struct nv40_state_entry nv40_state_fragprog = { +	.validate = nv40_fragprog_validate, +	.dirty = { +		.pipe = NV40_NEW_FRAGPROG, +		.hw = NV40_STATE_FRAGPROG +	} +}; + diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c new file mode 100644 index 0000000000..0227d22620 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -0,0 +1,168 @@ +#include "nv40_context.h" + +#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw)            \ +{                                                                              \ +  TRUE,                                                                        \ +  PIPE_FORMAT_##m,                                                             \ +  NV40TCL_TEX_FORMAT_FORMAT_##tf,                                              \ +  (NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y |         \ +   NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w |         \ +   NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y |         \ +   NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w),         \ +  ((NV40TCL_TEX_FILTER_SIGNED_RED*sx) | (NV40TCL_TEX_FILTER_SIGNED_GREEN*sy) |       \ +   (NV40TCL_TEX_FILTER_SIGNED_BLUE*sz) | (NV40TCL_TEX_FILTER_SIGNED_ALPHA*sw))       \ +} + +struct nv40_texture_format { +	boolean defined; +	uint	pipe; +	int     format; +	int     swizzle; +	int     sign; +}; + +static struct nv40_texture_format +nv40_texture_formats[] = { +	_(A8R8G8B8_UNORM, A8R8G8B8,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0), +	_(A1R5G5B5_UNORM, A1R5G5B5,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0), +	_(A4R4G4B4_UNORM, A4R4G4B4,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0), +	_(R5G6B5_UNORM  , R5G6B5  ,   S1,   S1,   S1,  ONE, X, Y, Z, W, 0, 0, 0, 0), +	_(L8_UNORM      , L8      ,   S1,   S1,   S1,  ONE, X, X, X, X, 0, 0, 0, 0), +	_(A8_UNORM      , L8      , ZERO, ZERO, ZERO,   S1, X, X, X, X, 0, 0, 0, 0), +	_(R16_SNORM     , A16     , ZERO, ZERO,   S1,  ONE, X, X, X, Y, 1, 1, 1, 1), +	_(I8_UNORM      , L8      ,   S1,   S1,   S1,   S1, X, X, X, X, 0, 0, 0, 0), +	_(A8L8_UNORM    , A8L8    ,   S1,   S1,   S1,   S1, X, X, X, Y, 0, 0, 0, 0), +	_(Z16_UNORM     , Z16     ,   S1,   S1,   S1,  ONE, X, X, X, X, 0, 0, 0, 0), +	_(Z24S8_UNORM   , Z24     ,   S1,   S1,   S1,  ONE, X, X, X, X, 0, 0, 0, 0), +	_(DXT1_RGB      , DXT1    ,   S1,   S1,   S1,  ONE, X, Y, Z, W, 0, 0, 0, 0), +	_(DXT1_RGBA     , DXT1    ,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0), +	_(DXT3_RGBA     , DXT3    ,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0), +	_(DXT5_RGBA     , DXT5    ,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0), +	{}, +}; + +static struct nv40_texture_format * +nv40_fragtex_format(uint pipe_format) +{ +	struct nv40_texture_format *tf = nv40_texture_formats; + +	while (tf->defined) { +		if (tf->pipe == pipe_format) +			return tf; +		tf++; +	} + +	NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format)); +	return NULL; +} + + +static struct nouveau_stateobj * +nv40_fragtex_build(struct nv40_context *nv40, int unit) +{ +	struct nv40_sampler_state *ps = nv40->tex_sampler[unit]; +	struct nv40_miptree *nv40mt = nv40->tex_miptree[unit]; +	struct pipe_texture *pt = &nv40mt->base; +	struct nv40_texture_format *tf; +	struct nouveau_stateobj *so; +	uint32_t txf, txs, txp; +	unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + +	tf = nv40_fragtex_format(pt->format); +	if (!tf) +		assert(0); + +	txf  = ps->fmt; +	txf |= tf->format | 0x8000; +	txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT); + +	if (1) /* XXX */ +		txf |= NV40TCL_TEX_FORMAT_NO_BORDER; + +	switch (pt->target) { +	case PIPE_TEXTURE_CUBE: +		txf |= NV40TCL_TEX_FORMAT_CUBIC; +		/* fall-through */ +	case PIPE_TEXTURE_2D: +		txf |= NV40TCL_TEX_FORMAT_DIMS_2D; +		break; +	case PIPE_TEXTURE_3D: +		txf |= NV40TCL_TEX_FORMAT_DIMS_3D; +		break; +	case PIPE_TEXTURE_1D: +		txf |= NV40TCL_TEX_FORMAT_DIMS_1D; +		break; +	default: +		NOUVEAU_ERR("Unknown target %d\n", pt->target); +		return NULL; +	} + +	if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { +		txp = 0; +	} else { +		txp  = nv40mt->level[0].pitch; +		txf |= NV40TCL_TEX_FORMAT_LINEAR; +	} + +	txs = tf->swizzle; + +	so = so_new(16, 2); +	so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8); +	so_reloc (so, nv40mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); +	so_reloc (so, nv40mt->buffer, txf, tex_flags | NOUVEAU_BO_OR, +		  NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1); +	so_data  (so, ps->wrap); +	so_data  (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en); +	so_data  (so, txs); +	so_data  (so, ps->filt | tf->sign | 0x2000 /*voodoo*/); +	so_data  (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) | +		       pt->height[0]); +	so_data  (so, ps->bcol); +	so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1); +	so_data  (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); + +	return so; +} + +static boolean +nv40_fragtex_validate(struct nv40_context *nv40) +{ +	struct nv40_fragment_program *fp = nv40->fragprog; +	struct nv40_state *state = &nv40->state; +	struct nouveau_stateobj *so; +	unsigned samplers, unit; + +	samplers = state->fp_samplers & ~fp->samplers; +	while (samplers) { +		unit = ffs(samplers) - 1; +		samplers &= ~(1 << unit); + +		so = so_new(2, 0); +		so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1); +		so_data  (so, 0); +		so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); +		state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); +	} + +	samplers = nv40->dirty_samplers & fp->samplers; +	while (samplers) { +		unit = ffs(samplers) - 1; +		samplers &= ~(1 << unit); + +		so = nv40_fragtex_build(nv40, unit); +		so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); +		state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); +	} + +	nv40->state.fp_samplers = fp->samplers; +	return FALSE; +} + +struct nv40_state_entry nv40_state_fragtex = { +	.validate = nv40_fragtex_validate, +	.dirty = { +		.pipe = NV40_NEW_SAMPLER | NV40_NEW_FRAGPROG, +		.hw = 0 +	} +}; + diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c new file mode 100644 index 0000000000..e4f8df910a --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -0,0 +1,237 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv40_context.h" + +static void +nv40_miptree_layout(struct nv40_miptree *mt) +{ +	struct pipe_texture *pt = &mt->base; +	uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; +	uint offset = 0; +	int nr_faces, l, f; +	uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | +		                           PIPE_TEXTURE_USAGE_DEPTH_STENCIL | +		                           PIPE_TEXTURE_USAGE_RENDER_TARGET | +		                           PIPE_TEXTURE_USAGE_DISPLAY_TARGET | +		                           PIPE_TEXTURE_USAGE_PRIMARY); + +	if (pt->target == PIPE_TEXTURE_CUBE) { +		nr_faces = 6; +	} else +	if (pt->target == PIPE_TEXTURE_3D) { +		nr_faces = pt->depth[0]; +	} else { +		nr_faces = 1; +	} + +	for (l = 0; l <= pt->last_level; l++) { +		pt->width[l] = width; +		pt->height[l] = height; +		pt->depth[l] = depth; +		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); +		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + +		if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) +			mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64); +		else +			mt->level[l].pitch = pt->width[l] * pt->block.size; + +		mt->level[l].image_offset = +			CALLOC(nr_faces, sizeof(unsigned)); + +		width  = MAX2(1, width  >> 1); +		height = MAX2(1, height >> 1); +		depth  = MAX2(1, depth  >> 1); +	} + +	for (f = 0; f < nr_faces; f++) { +		for (l = 0; l < pt->last_level; l++) { +			mt->level[l].image_offset[f] = offset; + +			if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) +				offset += align(mt->level[l].pitch * pt->height[l], 64); +			else +				offset += mt->level[l].pitch * pt->height[l]; +		} + +		mt->level[l].image_offset[f] = offset; +		offset += mt->level[l].pitch * pt->height[l]; +	} + +	mt->total_size = offset; +} + +static struct pipe_texture * +nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) +{ +	struct pipe_winsys *ws = pscreen->winsys; +	struct nv40_miptree *mt; +	unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | +	                     NOUVEAU_BUFFER_USAGE_TEXTURE; + +	mt = MALLOC(sizeof(struct nv40_miptree)); +	if (!mt) +		return NULL; +	mt->base = *pt; +	mt->base.refcount = 1; +	mt->base.screen = pscreen; +	mt->shadow_tex = NULL; +	mt->shadow_surface = NULL; + +	/* Swizzled textures must be POT */ +	if (pt->width[0] & (pt->width[0] - 1) || +	    pt->height[0] & (pt->height[0] - 1)) +		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +	else +	if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | +	                     PIPE_TEXTURE_USAGE_DISPLAY_TARGET | +	                     PIPE_TEXTURE_USAGE_DEPTH_STENCIL)) +		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +	else +	if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) +		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +	else { +		switch (pt->format) { +		/* TODO: Figure out which formats can be swizzled */ +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_X8R8G8B8_UNORM: +		case PIPE_FORMAT_R16_SNORM: +		{ +			if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE)) +				mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +			break; +		} +		default: +			mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +		} +	} + +	if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) +		buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + +	nv40_miptree_layout(mt); + +	mt->buffer = ws->buffer_create(ws, 256, buf_usage, mt->total_size); +	if (!mt->buffer) { +		FREE(mt); +		return NULL; +	} + +	return &mt->base; +} + +static struct pipe_texture * +nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, +		     const unsigned *stride, struct pipe_buffer *pb) +{ +	struct nv40_miptree *mt; + +	/* Only supports 2D, non-mipmapped textures for the moment */ +	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || +	    pt->depth[0] != 1) +		return NULL; + +	mt = CALLOC_STRUCT(nv40_miptree); +	if (!mt) +		return NULL; + +	mt->base = *pt; +	mt->base.refcount = 1; +	mt->base.screen = pscreen; +	mt->level[0].pitch = stride[0]; +	mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + +	pipe_buffer_reference(pscreen, &mt->buffer, pb); +	return &mt->base; +} + +static void +nv40_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) +{ +	struct pipe_texture *pt = *ppt; +	struct nv40_miptree *mt = (struct nv40_miptree *)pt; +	int l; + +	*ppt = NULL; +	if (--pt->refcount) +		return; + +	pipe_buffer_reference(pscreen, &mt->buffer, NULL); +	for (l = 0; l <= pt->last_level; l++) { +		if (mt->level[l].image_offset) +			FREE(mt->level[l].image_offset); +	} + +	if (mt->shadow_tex) { +		if (mt->shadow_surface) +			pscreen->tex_surface_release(pscreen, &mt->shadow_surface); +		nv40_miptree_release(pscreen, &mt->shadow_tex); +	} + +	FREE(mt); +} + +static struct pipe_surface * +nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, +			 unsigned face, unsigned level, unsigned zslice, +			 unsigned flags) +{ +	struct nv40_miptree *mt = (struct nv40_miptree *)pt; +	struct pipe_surface *ps; + +	ps = CALLOC_STRUCT(pipe_surface); +	if (!ps) +		return NULL; +	pipe_texture_reference(&ps->texture, pt); +	ps->format = pt->format; +	ps->width = pt->width[level]; +	ps->height = pt->height[level]; +	ps->block = pt->block; +	ps->nblocksx = pt->nblocksx[level]; +	ps->nblocksy = pt->nblocksy[level]; +	ps->stride = mt->level[level].pitch; +	ps->usage = flags; +	ps->status = PIPE_SURFACE_STATUS_DEFINED; +	ps->refcount = 1; +	ps->face = face; +	ps->level = level; +	ps->zslice = zslice; + +	if (pt->target == PIPE_TEXTURE_CUBE) { +		ps->offset = mt->level[level].image_offset[face]; +	} else +	if (pt->target == PIPE_TEXTURE_3D) { +		ps->offset = mt->level[level].image_offset[zslice]; +	} else { +		ps->offset = mt->level[level].image_offset[0]; +	} + +	return ps; +} + +static void +nv40_miptree_surface_del(struct pipe_screen *pscreen, +			 struct pipe_surface **psurface) +{ +	struct pipe_surface *ps = *psurface; + +	*psurface = NULL; +	if (--ps->refcount > 0) +		return; + +	pipe_texture_reference(&ps->texture, NULL); +	FREE(ps); +} + +void +nv40_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ +	pscreen->texture_create = nv40_miptree_create; +	pscreen->texture_blanket = nv40_miptree_blanket; +	pscreen->texture_release = nv40_miptree_release; +	pscreen->get_tex_surface = nv40_miptree_surface_new; +	pscreen->tex_surface_release = nv40_miptree_surface_del; +} + diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c new file mode 100644 index 0000000000..9b9a43f49d --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -0,0 +1,122 @@ +#include "pipe/p_context.h" + +#include "nv40_context.h" + +struct nv40_query { +	struct nouveau_resource *object; +	unsigned type; +	boolean ready; +	uint64_t result; +}; + +static INLINE struct nv40_query * +nv40_query(struct pipe_query *pipe) +{ +	return (struct nv40_query *)pipe; +} + +static struct pipe_query * +nv40_query_create(struct pipe_context *pipe, unsigned query_type) +{ +	struct nv40_query *q; + +	q = CALLOC(1, sizeof(struct nv40_query)); +	q->type = query_type; + +	return (struct pipe_query *)q; +} + +static void +nv40_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_query *q = nv40_query(pq); + +	if (q->object) +		nv40->nvws->res_free(&q->object); +	FREE(q); +} + +static void +nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_query *q = nv40_query(pq); + +	assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + +	/* Happens when end_query() is called, then another begin_query() +	 * without querying the result in-between.  For now we'll wait for +	 * the existing query to notify completion, but it could be better. +	 */ +	if (q->object) { +		uint64_t tmp; +		pipe->get_query_result(pipe, pq, 1, &tmp); +	} + +	if (nv40->nvws->res_alloc(nv40->screen->query_heap, 1, NULL, &q->object)) +		assert(0); +	nv40->nvws->notifier_reset(nv40->screen->query, q->object->start); + +	BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1); +	OUT_RING  (1); +	BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1); +	OUT_RING  (1); + +	q->ready = FALSE; +} + +static void +nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_query *q = nv40_query(pq); + +	BEGIN_RING(curie, NV40TCL_QUERY_GET, 1); +	OUT_RING  ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | +		   ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT)); +	FIRE_RING(NULL); +} + +static boolean +nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq, +		  boolean wait, uint64_t *result) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_query *q = nv40_query(pq); +	struct nouveau_winsys *nvws = nv40->nvws; + +	assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); + +	if (!q->ready) { +		unsigned status; + +		status = nvws->notifier_status(nv40->screen->query, +					       q->object->start); +		if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { +			if (wait == FALSE) +				return FALSE; +			nvws->notifier_wait(nv40->screen->query, q->object->start, +					    NV_NOTIFY_STATE_STATUS_COMPLETED, +					    0); +		} + +		q->result = nvws->notifier_retval(nv40->screen->query, +						  q->object->start); +		q->ready = TRUE; +		nvws->res_free(&q->object); +	} + +	*result = q->result; +	return TRUE; +} + +void +nv40_init_query_functions(struct nv40_context *nv40) +{ +	nv40->pipe.create_query = nv40_query_create; +	nv40->pipe.destroy_query = nv40_query_destroy; +	nv40->pipe.begin_query = nv40_query_begin; +	nv40->pipe.end_query = nv40_query_end; +	nv40->pipe.get_query_result = nv40_query_result; +} diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c new file mode 100644 index 0000000000..2372bc8441 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -0,0 +1,383 @@ +#include "pipe/p_screen.h" +#include "util/u_simple_screen.h" + +#include "nv40_context.h" +#include "nv40_screen.h" + +#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf +#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 +#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 + +static const char * +nv40_screen_get_name(struct pipe_screen *pscreen) +{ +	struct nv40_screen *screen = nv40_screen(pscreen); +	struct nouveau_device *dev = screen->nvws->channel->device; +	static char buffer[128]; + +	snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); +	return buffer; +} + +static const char * +nv40_screen_get_vendor(struct pipe_screen *pscreen) +{ +	return "nouveau"; +} + +static int +nv40_screen_get_param(struct pipe_screen *pscreen, int param) +{ +	struct nv40_screen *screen = nv40_screen(pscreen); + +	switch (param) { +	case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: +		return 16; +	case PIPE_CAP_NPOT_TEXTURES: +		return 1; +	case PIPE_CAP_TWO_SIDED_STENCIL: +		return 1; +	case PIPE_CAP_GLSL: +		return 0; +	case PIPE_CAP_S3TC: +		return 1; +	case PIPE_CAP_ANISOTROPIC_FILTER: +		return 1; +	case PIPE_CAP_POINT_SPRITE: +		return 1; +	case PIPE_CAP_MAX_RENDER_TARGETS: +		return 4; +	case PIPE_CAP_OCCLUSION_QUERY: +		return 1; +	case PIPE_CAP_TEXTURE_SHADOW_MAP: +		return 1; +	case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: +		return 13; +	case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: +		return 10; +	case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: +		return 13; +	case PIPE_CAP_TEXTURE_MIRROR_CLAMP: +	case PIPE_CAP_TEXTURE_MIRROR_REPEAT: +		return 1; +	case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: +		return 0; /* We have 4 - but unsupported currently */ +	case NOUVEAU_CAP_HW_VTXBUF: +		return 1; +	case NOUVEAU_CAP_HW_IDXBUF: +		if (screen->curie->grclass == NV40TCL) +			return 1; +		return 0; +	default: +		NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); +		return 0; +	} +} + +static float +nv40_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ +	switch (param) { +	case PIPE_CAP_MAX_LINE_WIDTH: +	case PIPE_CAP_MAX_LINE_WIDTH_AA: +		return 10.0; +	case PIPE_CAP_MAX_POINT_WIDTH: +	case PIPE_CAP_MAX_POINT_WIDTH_AA: +		return 64.0; +	case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: +		return 16.0; +	case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: +		return 16.0; +	default: +		NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); +		return 0.0; +	} +} + +static boolean +nv40_screen_surface_format_supported(struct pipe_screen *pscreen, +				     enum pipe_format format, +				     enum pipe_texture_target target, +				     unsigned tex_usage, unsigned geom_flags) +{ +	if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { +		switch (format) { +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_R5G6B5_UNORM:  +		case PIPE_FORMAT_Z24S8_UNORM: +		case PIPE_FORMAT_Z16_UNORM: +			return TRUE; +		default: +			break; +		} +	} else { +		switch (format) { +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_A1R5G5B5_UNORM: +		case PIPE_FORMAT_A4R4G4B4_UNORM: +		case PIPE_FORMAT_R5G6B5_UNORM: +		case PIPE_FORMAT_R16_SNORM: +		case PIPE_FORMAT_L8_UNORM: +		case PIPE_FORMAT_A8_UNORM: +		case PIPE_FORMAT_I8_UNORM: +		case PIPE_FORMAT_A8L8_UNORM: +		case PIPE_FORMAT_Z16_UNORM: +		case PIPE_FORMAT_Z24S8_UNORM: +		case PIPE_FORMAT_DXT1_RGB: +		case PIPE_FORMAT_DXT1_RGBA: +		case PIPE_FORMAT_DXT3_RGBA: +		case PIPE_FORMAT_DXT5_RGBA: +			return TRUE; +		default: +			break; +		} +	} + +	return FALSE; +} + +static struct pipe_buffer * +nv40_surface_buffer(struct pipe_surface *surf) +{ +	struct nv40_miptree *mt = (struct nv40_miptree *)surf->texture; + +	return mt->buffer; +} + +static void * +nv40_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, +		 unsigned flags ) +{ +	struct pipe_winsys	*ws = screen->winsys; +	struct pipe_surface	*surface_to_map; +	void			*map; + +	if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { +		struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; + +		if (!mt->shadow_tex) { +			unsigned old_tex_usage = surface->texture->tex_usage; +			surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR | +			                              PIPE_TEXTURE_USAGE_DYNAMIC; +			mt->shadow_tex = screen->texture_create(screen, surface->texture); +			surface->texture->tex_usage = old_tex_usage; + +			assert(mt->shadow_tex->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR); +		} + +		mt->shadow_surface = screen->get_tex_surface +		( +			screen, mt->shadow_tex, +			surface->face, surface->level, surface->zslice, +			surface->usage +		); + +		surface_to_map = mt->shadow_surface; +	} +	else +		surface_to_map = surface; + +	assert(surface_to_map); +	map = ws->buffer_map(ws, nv40_surface_buffer(surface_to_map), flags); +	if (!map) +		return NULL; + +	return map + surface_to_map->offset; +} + +static void +nv40_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ +	struct pipe_winsys	*ws = screen->winsys; +	struct pipe_surface	*surface_to_unmap; + +	/* TODO: Copy from shadow just before push buffer is flushed instead. +	         There are probably some programs that map/unmap excessively +	         before rendering. */ +	if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { +		struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; + +		assert(mt->shadow_tex); + +		surface_to_unmap = mt->shadow_surface; +	} +	else +		surface_to_unmap = surface; + +	assert(surface_to_unmap); + +	ws->buffer_unmap(ws, nv40_surface_buffer(surface_to_unmap)); + +	if (surface_to_unmap != surface) { +		struct nv40_screen *nvscreen = nv40_screen(screen); + +		nvscreen->eng2d->copy(nvscreen->eng2d, surface, 0, 0, +		                      surface_to_unmap, 0, 0, +		                      surface->width, surface->height); + +		screen->tex_surface_release(screen, &surface_to_unmap); +	} +} + +static void +nv40_screen_destroy(struct pipe_screen *pscreen) +{ +	struct nv40_screen *screen = nv40_screen(pscreen); +	struct nouveau_winsys *nvws = screen->nvws; + +	nvws->res_free(&screen->vp_exec_heap); +	nvws->res_free(&screen->vp_data_heap); +	nvws->res_free(&screen->query_heap); +	nvws->notifier_free(&screen->query); +	nvws->notifier_free(&screen->sync); +	nvws->grobj_free(&screen->curie); + +	FREE(pscreen); +} + +struct pipe_screen * +nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ +	struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen); +	struct nouveau_stateobj *so; +	unsigned curie_class; +	unsigned chipset = nvws->channel->device->chipset; +	int ret; + +	if (!screen) +		return NULL; +	screen->nvws = nvws; + +	/* 2D engine setup */ +	screen->eng2d = nv04_surface_2d_init(nvws); +	screen->eng2d->buf = nv40_surface_buffer; + +	/* 3D object */ +	switch (chipset & 0xf0) { +	case 0x40: +		if (NV4X_GRCLASS4097_CHIPSETS & (1 << (chipset & 0x0f))) +			curie_class = NV40TCL; +		else +		if (NV4X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) +			curie_class = NV44TCL; +		break; +	case 0x60: +		if (NV6X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) +			curie_class = NV44TCL; +		break; +	default: +		break; +	} + +	if (!curie_class) { +		NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", chipset); +		return NULL; +	} + +	ret = nvws->grobj_alloc(nvws, curie_class, &screen->curie); +	if (ret) { +		NOUVEAU_ERR("Error creating 3D object: %d\n", ret); +		return FALSE; +	} + +	/* Notifier for sync purposes */ +	ret = nvws->notifier_alloc(nvws, 1, &screen->sync); +	if (ret) { +		NOUVEAU_ERR("Error creating notifier object: %d\n", ret); +		nv40_screen_destroy(&screen->pipe); +		return NULL; +	} + +	/* Query objects */ +	ret = nvws->notifier_alloc(nvws, 32, &screen->query); +	if (ret) { +		NOUVEAU_ERR("Error initialising query objects: %d\n", ret); +		nv40_screen_destroy(&screen->pipe); +		return NULL; +	} + +	ret = nvws->res_init(&screen->query_heap, 0, 32); +	if (ret) { +		NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); +		nv40_screen_destroy(&screen->pipe); +		return NULL; +	} + +	/* Vtxprog resources */ +	if (nvws->res_init(&screen->vp_exec_heap, 0, 512) || +	    nvws->res_init(&screen->vp_data_heap, 0, 256)) { +		nv40_screen_destroy(&screen->pipe); +		return NULL; +	} + +	/* Static curie initialisation */ +	so = so_new(128, 0); +	so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1); +	so_data  (so, screen->sync->handle); +	so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2); +	so_data  (so, nvws->channel->vram->handle); +	so_data  (so, nvws->channel->gart->handle); +	so_method(so, screen->curie, NV40TCL_DMA_COLOR1, 1); +	so_data  (so, nvws->channel->vram->handle); +	so_method(so, screen->curie, NV40TCL_DMA_COLOR0, 2); +	so_data  (so, nvws->channel->vram->handle); +	so_data  (so, nvws->channel->vram->handle); +	so_method(so, screen->curie, NV40TCL_DMA_VTXBUF0, 2); +	so_data  (so, nvws->channel->vram->handle); +	so_data  (so, nvws->channel->gart->handle); +	so_method(so, screen->curie, NV40TCL_DMA_FENCE, 2); +	so_data  (so, 0); +	so_data  (so, screen->query->handle); +	so_method(so, screen->curie, NV40TCL_DMA_UNK01AC, 2); +	so_data  (so, nvws->channel->vram->handle); +	so_data  (so, nvws->channel->vram->handle); +	so_method(so, screen->curie, NV40TCL_DMA_COLOR2, 2); +	so_data  (so, nvws->channel->vram->handle); +	so_data  (so, nvws->channel->vram->handle); + +	so_method(so, screen->curie, 0x1ea4, 3); +	so_data  (so, 0x00000010); +	so_data  (so, 0x01000100); +	so_data  (so, 0xff800006); + +	/* vtxprog output routing */ +	so_method(so, screen->curie, 0x1fc4, 1); +	so_data  (so, 0x06144321); +	so_method(so, screen->curie, 0x1fc8, 2); +	so_data  (so, 0xedcba987); +	so_data  (so, 0x00000021); +	so_method(so, screen->curie, 0x1fd0, 1); +	so_data  (so, 0x00171615); +	so_method(so, screen->curie, 0x1fd4, 1); +	so_data  (so, 0x001b1a19); + +	so_method(so, screen->curie, 0x1ef8, 1); +	so_data  (so, 0x0020ffff); +	so_method(so, screen->curie, 0x1d64, 1); +	so_data  (so, 0x00d30000); +	so_method(so, screen->curie, 0x1e94, 1); +	so_data  (so, 0x00000001); + +	so_emit(nvws, so); +	so_ref(NULL, &so); +	nvws->push_flush(nvws, 0, NULL); + +	screen->pipe.winsys = ws; +	screen->pipe.destroy = nv40_screen_destroy; + +	screen->pipe.get_name = nv40_screen_get_name; +	screen->pipe.get_vendor = nv40_screen_get_vendor; +	screen->pipe.get_param = nv40_screen_get_param; +	screen->pipe.get_paramf = nv40_screen_get_paramf; + +	screen->pipe.is_format_supported = nv40_screen_surface_format_supported; + +	screen->pipe.surface_map = nv40_surface_map; +	screen->pipe.surface_unmap = nv40_surface_unmap; + +	nv40_screen_init_miptree_functions(&screen->pipe); +	u_simple_screen_init(&screen->pipe); + +	return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nv40/nv40_screen.h new file mode 100644 index 0000000000..4500aa0e5c --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_screen.h @@ -0,0 +1,37 @@ +#ifndef __NV40_SCREEN_H__ +#define __NV40_SCREEN_H__ + +#include "pipe/p_screen.h" +#include "nv04/nv04_surface_2d.h" + +struct nv40_screen { +	struct pipe_screen pipe; + +	struct nouveau_winsys *nvws; + +	unsigned cur_pctx; + +	/* HW graphics objects */ +	struct nv04_surface_2d *eng2d; +	struct nouveau_grobj *curie; +	struct nouveau_notifier *sync; + +	/* Query object resources */ +	struct nouveau_notifier *query; +	struct nouveau_resource *query_heap; + +	/* Vtxprog resources */ +	struct nouveau_resource *vp_exec_heap; +	struct nouveau_resource *vp_data_heap; + +	/* Current 3D state of channel */ +	struct nouveau_stateobj *state[NV40_STATE_MAX]; +}; + +static INLINE struct nv40_screen * +nv40_screen(struct pipe_screen *screen) +{ +	return (struct nv40_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h new file mode 100644 index 0000000000..854dccf548 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_shader.h @@ -0,0 +1,556 @@ +#ifndef __NV40_SHADER_H__ +#define __NV40_SHADER_H__ + +/* Vertex programs instruction set + * + * The NV40 instruction set is very similar to NV30.  Most fields are in + * a slightly different position in the instruction however. + * + * Merged instructions + *     In some cases it is possible to put two instructions into one opcode + *     slot.  The rules for when this is OK is not entirely clear to me yet. + * + *     There are separate writemasks and dest temp register fields for each + *     grouping of instructions.  There is however only one field with the + *     ID of a result register.  Writing to temp/result regs is selected by + *     setting VEC_RESULT/SCA_RESULT. + * + * Temporary registers + *     The source/dest temp register fields have been extended by 1 bit, to + *     give a total of 32 temporary registers. + * + * Relative Addressing + *     NV40 can use an address register to index into vertex attribute regs. + *     This is done by putting the offset value into INPUT_SRC and setting + *     the INDEX_INPUT flag. + * + * Conditional execution (see NV_vertex_program{2,3} for details) + *     There is a second condition code register on NV40, it's use is enabled + *     by setting the COND_REG_SELECT_1 flag. + * + * Texture lookup + *     TODO + */ + +/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */ +#define NV40_VP_INST_VEC_RESULT                                        (1 << 30) +/* uncertain.. */ +#define NV40_VP_INST_COND_UPDATE_ENABLE                        ((1 << 14)|1<<29) +/* use address reg as index into attribs */ +#define NV40_VP_INST_INDEX_INPUT                                       (1 << 27) +#define NV40_VP_INST_COND_REG_SELECT_1                                 (1 << 25) +#define NV40_VP_INST_ADDR_REG_SELECT_1                                 (1 << 24) +#define NV40_VP_INST_SRC2_ABS                                          (1 << 23) +#define NV40_VP_INST_SRC1_ABS                                          (1 << 22) +#define NV40_VP_INST_SRC0_ABS                                          (1 << 21) +#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT                                      15 +#define NV40_VP_INST_VEC_DEST_TEMP_MASK                             (0x1F << 15) +#define NV40_VP_INST_COND_TEST_ENABLE                                  (1 << 13) +#define NV40_VP_INST_COND_SHIFT                                               10 +#define NV40_VP_INST_COND_MASK                                       (0x7 << 10) +#    define NV40_VP_INST_COND_FL                                               0 +#    define NV40_VP_INST_COND_LT                                               1 +#    define NV40_VP_INST_COND_EQ                                               2 +#    define NV40_VP_INST_COND_LE                                               3 +#    define NV40_VP_INST_COND_GT                                               4 +#    define NV40_VP_INST_COND_NE                                               5 +#    define NV40_VP_INST_COND_GE                                               6 +#    define NV40_VP_INST_COND_TR                                               7 +#define NV40_VP_INST_COND_SWZ_X_SHIFT                                          8 +#define NV40_VP_INST_COND_SWZ_X_MASK                                    (3 << 8) +#define NV40_VP_INST_COND_SWZ_Y_SHIFT                                          6 +#define NV40_VP_INST_COND_SWZ_Y_MASK                                    (3 << 6) +#define NV40_VP_INST_COND_SWZ_Z_SHIFT                                          4 +#define NV40_VP_INST_COND_SWZ_Z_MASK                                    (3 << 4) +#define NV40_VP_INST_COND_SWZ_W_SHIFT                                          2 +#define NV40_VP_INST_COND_SWZ_W_MASK                                    (3 << 2) +#define NV40_VP_INST_COND_SWZ_ALL_SHIFT                                        2 +#define NV40_VP_INST_COND_SWZ_ALL_MASK                               (0xFF << 2) +#define NV40_VP_INST_ADDR_SWZ_SHIFT                                            0 +#define NV40_VP_INST_ADDR_SWZ_MASK                                   (0x03 << 0) +#define NV40_VP_INST0_KNOWN ( \ +                NV40_VP_INST_INDEX_INPUT | \ +                NV40_VP_INST_COND_REG_SELECT_1 | \ +                NV40_VP_INST_ADDR_REG_SELECT_1 | \ +                NV40_VP_INST_SRC2_ABS | \ +                NV40_VP_INST_SRC1_ABS | \ +                NV40_VP_INST_SRC0_ABS | \ +                NV40_VP_INST_VEC_DEST_TEMP_MASK | \ +                NV40_VP_INST_COND_TEST_ENABLE | \ +                NV40_VP_INST_COND_MASK | \ +                NV40_VP_INST_COND_SWZ_ALL_MASK | \ +                NV40_VP_INST_ADDR_SWZ_MASK) + +/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ +#define NV40_VP_INST_VEC_OPCODE_SHIFT                                         22 +#define NV40_VP_INST_VEC_OPCODE_MASK                                (0x1F << 22) +#    define NV40_VP_INST_OP_NOP                                             0x00 +#    define NV40_VP_INST_OP_MOV                                             0x01 +#    define NV40_VP_INST_OP_MUL                                             0x02 +#    define NV40_VP_INST_OP_ADD                                             0x03 +#    define NV40_VP_INST_OP_MAD                                             0x04 +#    define NV40_VP_INST_OP_DP3                                             0x05 +#    define NV40_VP_INST_OP_DPH                                             0x06 +#    define NV40_VP_INST_OP_DP4                                             0x07 +#    define NV40_VP_INST_OP_DST                                             0x08 +#    define NV40_VP_INST_OP_MIN                                             0x09 +#    define NV40_VP_INST_OP_MAX                                             0x0A +#    define NV40_VP_INST_OP_SLT                                             0x0B +#    define NV40_VP_INST_OP_SGE                                             0x0C +#    define NV40_VP_INST_OP_ARL                                             0x0D +#    define NV40_VP_INST_OP_FRC                                             0x0E +#    define NV40_VP_INST_OP_FLR                                             0x0F +#    define NV40_VP_INST_OP_SEQ                                             0x10 +#    define NV40_VP_INST_OP_SFL                                             0x11 +#    define NV40_VP_INST_OP_SGT                                             0x12 +#    define NV40_VP_INST_OP_SLE                                             0x13 +#    define NV40_VP_INST_OP_SNE                                             0x14 +#    define NV40_VP_INST_OP_STR                                             0x15 +#    define NV40_VP_INST_OP_SSG                                             0x16 +#    define NV40_VP_INST_OP_ARR                                             0x17 +#    define NV40_VP_INST_OP_ARA                                             0x18 +#    define NV40_VP_INST_OP_TXL                                             0x19 +#define NV40_VP_INST_SCA_OPCODE_SHIFT                                         27 +#define NV40_VP_INST_SCA_OPCODE_MASK                                (0x1F << 27) +#    define NV40_VP_INST_OP_NOP                                             0x00 +#    define NV40_VP_INST_OP_MOV                                             0x01 +#    define NV40_VP_INST_OP_RCP                                             0x02 +#    define NV40_VP_INST_OP_RCC                                             0x03 +#    define NV40_VP_INST_OP_RSQ                                             0x04 +#    define NV40_VP_INST_OP_EXP                                             0x05 +#    define NV40_VP_INST_OP_LOG                                             0x06 +#    define NV40_VP_INST_OP_LIT                                             0x07 +#    define NV40_VP_INST_OP_BRA                                             0x09 +#    define NV40_VP_INST_OP_CAL                                             0x0B +#    define NV40_VP_INST_OP_RET                                             0x0C +#    define NV40_VP_INST_OP_LG2                                             0x0D +#    define NV40_VP_INST_OP_EX2                                             0x0E +#    define NV40_VP_INST_OP_SIN                                             0x0F +#    define NV40_VP_INST_OP_COS                                             0x10 +#    define NV40_VP_INST_OP_PUSHA                                           0x13 +#    define NV40_VP_INST_OP_POPA                                            0x14 +#define NV40_VP_INST_CONST_SRC_SHIFT                                          12 +#define NV40_VP_INST_CONST_SRC_MASK                                 (0xFF << 12) +#define NV40_VP_INST_INPUT_SRC_SHIFT                                           8 +#define NV40_VP_INST_INPUT_SRC_MASK                                  (0x0F << 8) +#    define NV40_VP_INST_IN_POS                                                0 +#    define NV40_VP_INST_IN_WEIGHT                                             1 +#    define NV40_VP_INST_IN_NORMAL                                             2 +#    define NV40_VP_INST_IN_COL0                                               3 +#    define NV40_VP_INST_IN_COL1                                               4 +#    define NV40_VP_INST_IN_FOGC                                               5 +#    define NV40_VP_INST_IN_TC0                                                8 +#    define NV40_VP_INST_IN_TC(n)                                          (8+n) +#define NV40_VP_INST_SRC0H_SHIFT                                               0 +#define NV40_VP_INST_SRC0H_MASK                                      (0xFF << 0) +#define NV40_VP_INST1_KNOWN ( \ +                NV40_VP_INST_VEC_OPCODE_MASK | \ +                NV40_VP_INST_SCA_OPCODE_MASK | \ +                NV40_VP_INST_CONST_SRC_MASK  | \ +                NV40_VP_INST_INPUT_SRC_MASK  | \ +                NV40_VP_INST_SRC0H_MASK \ +                ) + +/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */ +#define NV40_VP_INST_SRC0L_SHIFT                                              23 +#define NV40_VP_INST_SRC0L_MASK                                    (0x1FF << 23) +#define NV40_VP_INST_SRC1_SHIFT                                                6 +#define NV40_VP_INST_SRC1_MASK                                    (0x1FFFF << 6) +#define NV40_VP_INST_SRC2H_SHIFT                                               0 +#define NV40_VP_INST_SRC2H_MASK                                      (0x3F << 0) +#define NV40_VP_INST_IADDRH_SHIFT                                              0 +#define NV40_VP_INST_IADDRH_MASK                                     (0x1F << 0) + +/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ +#define NV40_VP_INST_IADDRL_SHIFT                                             29 +#define NV40_VP_INST_IADDRL_MASK                                       (7 << 29) +#define NV40_VP_INST_SRC2L_SHIFT                                              21 +#define NV40_VP_INST_SRC2L_MASK                                    (0x7FF << 21) +#define NV40_VP_INST_SCA_WRITEMASK_SHIFT                                      17 +#define NV40_VP_INST_SCA_WRITEMASK_MASK                              (0xF << 17) +#    define NV40_VP_INST_SCA_WRITEMASK_X                               (1 << 20) +#    define NV40_VP_INST_SCA_WRITEMASK_Y                               (1 << 19) +#    define NV40_VP_INST_SCA_WRITEMASK_Z                               (1 << 18) +#    define NV40_VP_INST_SCA_WRITEMASK_W                               (1 << 17) +#define NV40_VP_INST_VEC_WRITEMASK_SHIFT                                      13 +#define NV40_VP_INST_VEC_WRITEMASK_MASK                              (0xF << 13) +#    define NV40_VP_INST_VEC_WRITEMASK_X                               (1 << 16) +#    define NV40_VP_INST_VEC_WRITEMASK_Y                               (1 << 15) +#    define NV40_VP_INST_VEC_WRITEMASK_Z                               (1 << 14) +#    define NV40_VP_INST_VEC_WRITEMASK_W                               (1 << 13) +#define NV40_VP_INST_SCA_RESULT                                        (1 << 12) +#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT                                       7 +#define NV40_VP_INST_SCA_DEST_TEMP_MASK                              (0x1F << 7) +#define NV40_VP_INST_DEST_SHIFT                                                2 +#define NV40_VP_INST_DEST_MASK                                         (31 << 2) +#    define NV40_VP_INST_DEST_POS                                              0 +#    define NV40_VP_INST_DEST_COL0                                             1 +#    define NV40_VP_INST_DEST_COL1                                             2 +#    define NV40_VP_INST_DEST_BFC0                                             3 +#    define NV40_VP_INST_DEST_BFC1                                             4 +#    define NV40_VP_INST_DEST_FOGC                                             5 +#    define NV40_VP_INST_DEST_PSZ                                              6 +#    define NV40_VP_INST_DEST_TC0                                              7 +#    define NV40_VP_INST_DEST_TC(n)                                        (7+n) +#    define NV40_VP_INST_DEST_TEMP                                          0x1F +#define NV40_VP_INST_INDEX_CONST                                        (1 << 1) +#define NV40_VP_INST_LAST                                               (1 << 0) +#define NV40_VP_INST3_KNOWN ( \ +                NV40_VP_INST_SRC2L_MASK |\ +                NV40_VP_INST_SCA_WRITEMASK_MASK |\ +                NV40_VP_INST_VEC_WRITEMASK_MASK |\ +                NV40_VP_INST_SCA_DEST_TEMP_MASK |\ +                NV40_VP_INST_DEST_MASK |\ +                NV40_VP_INST_INDEX_CONST) + +/* Useful to split the source selection regs into their pieces */ +#define NV40_VP_SRC0_HIGH_SHIFT                                                9 +#define NV40_VP_SRC0_HIGH_MASK                                        0x0001FE00 +#define NV40_VP_SRC0_LOW_MASK                                         0x000001FF +#define NV40_VP_SRC2_HIGH_SHIFT                                               11 +#define NV40_VP_SRC2_HIGH_MASK                                        0x0001F800 +#define NV40_VP_SRC2_LOW_MASK                                         0x000007FF + +/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */ +#define NV40_VP_SRC_NEGATE                                             (1 << 16) +#define NV40_VP_SRC_SWZ_X_SHIFT                                               14 +#define NV40_VP_SRC_SWZ_X_MASK                                         (3 << 14) +#define NV40_VP_SRC_SWZ_Y_SHIFT                                               12 +#define NV40_VP_SRC_SWZ_Y_MASK                                         (3 << 12) +#define NV40_VP_SRC_SWZ_Z_SHIFT                                               10 +#define NV40_VP_SRC_SWZ_Z_MASK                                         (3 << 10) +#define NV40_VP_SRC_SWZ_W_SHIFT                                                8 +#define NV40_VP_SRC_SWZ_W_MASK                                          (3 << 8) +#define NV40_VP_SRC_SWZ_ALL_SHIFT                                              8 +#define NV40_VP_SRC_SWZ_ALL_MASK                                     (0xFF << 8) +#define NV40_VP_SRC_TEMP_SRC_SHIFT                                             2 +#define NV40_VP_SRC_TEMP_SRC_MASK                                    (0x1F << 2) +#define NV40_VP_SRC_REG_TYPE_SHIFT                                             0 +#define NV40_VP_SRC_REG_TYPE_MASK                                       (3 << 0) +#    define NV40_VP_SRC_REG_TYPE_UNK0                                          0 +#    define NV40_VP_SRC_REG_TYPE_TEMP                                          1 +#    define NV40_VP_SRC_REG_TYPE_INPUT                                         2 +#    define NV40_VP_SRC_REG_TYPE_CONST                                         3 + + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + *         0 - Opcode, output reg/mask, ATTRIB source + *         1 - Source 0 + *         2 - Source 1 + *         3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + *                 result.color == R0.xyzw + *                 result.depth == R1.z + * When the fragprog contains instructions to write depth, + * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + *  + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords.  As such instructions such as: + *  + *                 ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and + * SWIZZLE_ONE. + * + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as + * SWIZZLE_ZERO is implemented simply by not writing to the relevant components + * of the destination. + * + * Looping + *   Loops appear to be fairly expensive on NV40 at least, the proprietary + *   driver goes to a lot of effort to avoid using the native looping + *   instructions.  If the total number of *executed* instructions between + *   REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. + *   The maximum loop count is 255. + * + * Conditional execution + *   TODO + *  + * Non-native instructions: + *         LIT + *         LRP - MAD+MAD + *         SUB - ADD, negate second source + *         RSQ - LG2 + EX2 + *         POW - LG2 + MUL + EX2 + *         SCS - COS + SIN + *         XPD + *         DP2 - MUL + ADD + *         NRM + */ + +//== Opcode / Destination selection == +#define NV40_FP_OP_PROGRAM_END                                          (1 << 0) +#define NV40_FP_OP_OUT_REG_SHIFT                                               1 +#define NV40_FP_OP_OUT_REG_MASK                                        (63 << 1) +/* Needs to be set when writing outputs to get expected result.. */ +#define NV40_FP_OP_OUT_REG_HALF                                         (1 << 7) +#define NV40_FP_OP_COND_WRITE_ENABLE                                    (1 << 8) +#define NV40_FP_OP_OUTMASK_SHIFT                                               9 +#define NV40_FP_OP_OUTMASK_MASK                                       (0xF << 9) +#    define NV40_FP_OP_OUT_X                                            (1 << 9) +#    define NV40_FP_OP_OUT_Y                                            (1 <<10) +#    define NV40_FP_OP_OUT_Z                                            (1 <<11) +#    define NV40_FP_OP_OUT_W                                            (1 <<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NV40_FP_OP_INPUT_SRC_SHIFT                                            13 +#define NV40_FP_OP_INPUT_SRC_MASK                                     (15 << 13) +#    define NV40_FP_OP_INPUT_SRC_POSITION                                    0x0 +#    define NV40_FP_OP_INPUT_SRC_COL0                                        0x1 +#    define NV40_FP_OP_INPUT_SRC_COL1                                        0x2 +#    define NV40_FP_OP_INPUT_SRC_FOGC                                        0x3 +#    define NV40_FP_OP_INPUT_SRC_TC0                                         0x4 +#    define NV40_FP_OP_INPUT_SRC_TC(n)                                 (0x4 + n) +#    define NV40_FP_OP_INPUT_SRC_FACING                                      0xE +#define NV40_FP_OP_TEX_UNIT_SHIFT                                             17 +#define NV40_FP_OP_TEX_UNIT_MASK                                     (0xF << 17) +#define NV40_FP_OP_PRECISION_SHIFT                                            22 +#define NV40_FP_OP_PRECISION_MASK                                      (3 << 22) +#   define NV40_FP_PRECISION_FP32                                              0 +#   define NV40_FP_PRECISION_FP16                                              1 +#   define NV40_FP_PRECISION_FX12                                              2 +#define NV40_FP_OP_OPCODE_SHIFT                                               24 +#define NV40_FP_OP_OPCODE_MASK                                      (0x3F << 24) +#        define NV40_FP_OP_OPCODE_NOP                                       0x00 +#        define NV40_FP_OP_OPCODE_MOV                                       0x01 +#        define NV40_FP_OP_OPCODE_MUL                                       0x02 +#        define NV40_FP_OP_OPCODE_ADD                                       0x03 +#        define NV40_FP_OP_OPCODE_MAD                                       0x04 +#        define NV40_FP_OP_OPCODE_DP3                                       0x05 +#        define NV40_FP_OP_OPCODE_DP4                                       0x06 +#        define NV40_FP_OP_OPCODE_DST                                       0x07 +#        define NV40_FP_OP_OPCODE_MIN                                       0x08 +#        define NV40_FP_OP_OPCODE_MAX                                       0x09 +#        define NV40_FP_OP_OPCODE_SLT                                       0x0A +#        define NV40_FP_OP_OPCODE_SGE                                       0x0B +#        define NV40_FP_OP_OPCODE_SLE                                       0x0C +#        define NV40_FP_OP_OPCODE_SGT                                       0x0D +#        define NV40_FP_OP_OPCODE_SNE                                       0x0E +#        define NV40_FP_OP_OPCODE_SEQ                                       0x0F +#        define NV40_FP_OP_OPCODE_FRC                                       0x10 +#        define NV40_FP_OP_OPCODE_FLR                                       0x11 +#        define NV40_FP_OP_OPCODE_KIL                                       0x12 +#        define NV40_FP_OP_OPCODE_PK4B                                      0x13 +#        define NV40_FP_OP_OPCODE_UP4B                                      0x14 +/* DDX/DDY can only write to XY */ +#        define NV40_FP_OP_OPCODE_DDX                                       0x15 +#        define NV40_FP_OP_OPCODE_DDY                                       0x16 +#        define NV40_FP_OP_OPCODE_TEX                                       0x17 +#        define NV40_FP_OP_OPCODE_TXP                                       0x18 +#        define NV40_FP_OP_OPCODE_TXD                                       0x19 +#        define NV40_FP_OP_OPCODE_RCP                                       0x1A +#        define NV40_FP_OP_OPCODE_EX2                                       0x1C +#        define NV40_FP_OP_OPCODE_LG2                                       0x1D +#        define NV40_FP_OP_OPCODE_STR                                       0x20 +#        define NV40_FP_OP_OPCODE_SFL                                       0x21 +#        define NV40_FP_OP_OPCODE_COS                                       0x22 +#        define NV40_FP_OP_OPCODE_SIN                                       0x23 +#        define NV40_FP_OP_OPCODE_PK2H                                      0x24 +#        define NV40_FP_OP_OPCODE_UP2H                                      0x25 +#        define NV40_FP_OP_OPCODE_PK4UB                                     0x27 +#        define NV40_FP_OP_OPCODE_UP4UB                                     0x28 +#        define NV40_FP_OP_OPCODE_PK2US                                     0x29 +#        define NV40_FP_OP_OPCODE_UP2US                                     0x2A +#        define NV40_FP_OP_OPCODE_DP2A                                      0x2E +#        define NV40_FP_OP_OPCODE_TXL                                       0x2F +#        define NV40_FP_OP_OPCODE_TXB                                       0x31 +#        define NV40_FP_OP_OPCODE_DIV                                       0x3A +#        define NV40_FP_OP_OPCODE_UNK_LIT                                   0x3C +/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ +#        define NV40_FP_OP_BRA_OPCODE_BRK                                    0x0 +#        define NV40_FP_OP_BRA_OPCODE_CAL                                    0x1 +#        define NV40_FP_OP_BRA_OPCODE_IF                                     0x2 +#        define NV40_FP_OP_BRA_OPCODE_LOOP                                   0x3 +#        define NV40_FP_OP_BRA_OPCODE_REP                                    0x4 +#        define NV40_FP_OP_BRA_OPCODE_RET                                    0x5 +#define NV40_FP_OP_OUT_SAT                                             (1 << 31) + +/* high order bits of SRC0 */ +#define NV40_FP_OP_OUT_ABS                                             (1 << 29) +#define NV40_FP_OP_COND_SWZ_W_SHIFT                                           27 +#define NV40_FP_OP_COND_SWZ_W_MASK                                     (3 << 27) +#define NV40_FP_OP_COND_SWZ_Z_SHIFT                                           25 +#define NV40_FP_OP_COND_SWZ_Z_MASK                                     (3 << 25) +#define NV40_FP_OP_COND_SWZ_Y_SHIFT                                           23 +#define NV40_FP_OP_COND_SWZ_Y_MASK                                     (3 << 23) +#define NV40_FP_OP_COND_SWZ_X_SHIFT                                           21 +#define NV40_FP_OP_COND_SWZ_X_MASK                                     (3 << 21) +#define NV40_FP_OP_COND_SWZ_ALL_SHIFT                                         21 +#define NV40_FP_OP_COND_SWZ_ALL_MASK                                (0xFF << 21) +#define NV40_FP_OP_COND_SHIFT                                                 18 +#define NV40_FP_OP_COND_MASK                                        (0x07 << 18) +#        define NV40_FP_OP_COND_FL                                             0 +#        define NV40_FP_OP_COND_LT                                             1 +#        define NV40_FP_OP_COND_EQ                                             2 +#        define NV40_FP_OP_COND_LE                                             3 +#        define NV40_FP_OP_COND_GT                                             4 +#        define NV40_FP_OP_COND_NE                                             5 +#        define NV40_FP_OP_COND_GE                                             6 +#        define NV40_FP_OP_COND_TR                                             7 + +/* high order bits of SRC1 */ +#define NV40_FP_OP_OPCODE_IS_BRANCH                                      (1<<31) +#define NV40_FP_OP_DST_SCALE_SHIFT                                            28 +#define NV40_FP_OP_DST_SCALE_MASK                                      (3 << 28) +#define NV40_FP_OP_DST_SCALE_1X                                                0 +#define NV40_FP_OP_DST_SCALE_2X                                                1 +#define NV40_FP_OP_DST_SCALE_4X                                                2 +#define NV40_FP_OP_DST_SCALE_8X                                                3 +#define NV40_FP_OP_DST_SCALE_INV_2X                                            5 +#define NV40_FP_OP_DST_SCALE_INV_4X                                            6 +#define NV40_FP_OP_DST_SCALE_INV_8X                                            7 + +/* SRC1 LOOP */ +#define NV40_FP_OP_LOOP_INCR_SHIFT                                            19 +#define NV40_FP_OP_LOOP_INCR_MASK                                   (0xFF << 19) +#define NV40_FP_OP_LOOP_INDEX_SHIFT                                           10 +#define NV40_FP_OP_LOOP_INDEX_MASK                                  (0xFF << 10) +#define NV40_FP_OP_LOOP_COUNT_SHIFT                                            2 +#define NV40_FP_OP_LOOP_COUNT_MASK                                   (0xFF << 2) + +/* SRC1 IF */ +#define NV40_FP_OP_ELSE_ID_SHIFT                                               2 +#define NV40_FP_OP_ELSE_ID_MASK                                      (0xFF << 2) + +/* SRC1 CAL */ +#define NV40_FP_OP_IADDR_SHIFT                                                 2 +#define NV40_FP_OP_IADDR_MASK                                        (0xFF << 2) + +/* SRC1 REP + *   I have no idea why there are 3 count values here..  but they + *   have always been filled with the same value in my tests so + *   far.. + */ +#define NV40_FP_OP_REP_COUNT1_SHIFT                                            2 +#define NV40_FP_OP_REP_COUNT1_MASK                                   (0xFF << 2) +#define NV40_FP_OP_REP_COUNT2_SHIFT                                           10 +#define NV40_FP_OP_REP_COUNT2_MASK                                  (0xFF << 10) +#define NV40_FP_OP_REP_COUNT3_SHIFT                                           19 +#define NV40_FP_OP_REP_COUNT3_MASK                                  (0xFF << 19) + +/* SRC2 REP/IF */ +#define NV40_FP_OP_END_ID_SHIFT                                                2 +#define NV40_FP_OP_END_ID_MASK                                       (0xFF << 2) + +// SRC2 high-order +#define NV40_FP_OP_INDEX_INPUT                                         (1 << 30) +#define NV40_FP_OP_ADDR_INDEX_SHIFT                                           19 +#define NV40_FP_OP_ADDR_INDEX_MASK                                   (0xF << 19) + +//== Register selection == +#define NV40_FP_REG_TYPE_SHIFT                                                 0 +#define NV40_FP_REG_TYPE_MASK                                           (3 << 0) +#        define NV40_FP_REG_TYPE_TEMP                                          0 +#        define NV40_FP_REG_TYPE_INPUT                                         1 +#        define NV40_FP_REG_TYPE_CONST                                         2 +#define NV40_FP_REG_SRC_SHIFT                                                  2 +#define NV40_FP_REG_SRC_MASK                                           (63 << 2) +#define NV40_FP_REG_SRC_HALF                                            (1 << 8) +#define NV40_FP_REG_SWZ_ALL_SHIFT                                              9 +#define NV40_FP_REG_SWZ_ALL_MASK                                      (255 << 9) +#define NV40_FP_REG_SWZ_X_SHIFT                                                9 +#define NV40_FP_REG_SWZ_X_MASK                                          (3 << 9) +#define NV40_FP_REG_SWZ_Y_SHIFT                                               11 +#define NV40_FP_REG_SWZ_Y_MASK                                         (3 << 11) +#define NV40_FP_REG_SWZ_Z_SHIFT                                               13 +#define NV40_FP_REG_SWZ_Z_MASK                                         (3 << 13) +#define NV40_FP_REG_SWZ_W_SHIFT                                               15 +#define NV40_FP_REG_SWZ_W_MASK                                         (3 << 15) +#        define NV40_FP_SWIZZLE_X                                              0 +#        define NV40_FP_SWIZZLE_Y                                              1 +#        define NV40_FP_SWIZZLE_Z                                              2 +#        define NV40_FP_SWIZZLE_W                                              3 +#define NV40_FP_REG_NEGATE                                             (1 << 17) + +#ifndef NV40_SHADER_NO_FUCKEDNESS +#define NV40SR_NONE	0 +#define NV40SR_OUTPUT	1 +#define NV40SR_INPUT	2 +#define NV40SR_TEMP	3 +#define NV40SR_CONST	4 + +struct nv40_sreg { +	int type; +	int index; + +	int dst_scale; + +	int negate; +	int abs; +	int swz[4]; + +	int cc_update; +	int cc_update_reg; +	int cc_test; +	int cc_test_reg; +	int cc_swz[4]; +}; + +static INLINE struct nv40_sreg +nv40_sr(int type, int index) +{ +	struct nv40_sreg temp = { +		.type = type, +		.index = index, +		.dst_scale = DEF_SCALE, +		.abs = 0, +		.negate = 0, +		.swz = { 0, 1, 2, 3 }, +		.cc_update = 0, +		.cc_update_reg = 0, +		.cc_test = DEF_CTEST, +		.cc_test_reg = 0, +		.cc_swz = { 0, 1, 2, 3 }, +	}; +	return temp; +} + +static INLINE struct nv40_sreg +nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w) +{ +	struct nv40_sreg dst = src; + +	dst.swz[SWZ_X] = src.swz[x]; +	dst.swz[SWZ_Y] = src.swz[y]; +	dst.swz[SWZ_Z] = src.swz[z]; +	dst.swz[SWZ_W] = src.swz[w]; +	return dst; +} + +static INLINE struct nv40_sreg +nv40_sr_neg(struct nv40_sreg src) +{ +	src.negate = !src.negate; +	return src; +} + +static INLINE struct nv40_sreg +nv40_sr_abs(struct nv40_sreg src) +{ +	src.abs = 1; +	return src; +} + +static INLINE struct nv40_sreg +nv40_sr_scale(struct nv40_sreg src, int scale) +{ +	src.dst_scale = scale; +	return src; +} +#endif + +#endif diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c new file mode 100644 index 0000000000..2eff25aa83 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -0,0 +1,740 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "draw/draw_context.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +static void * +nv40_blend_state_create(struct pipe_context *pipe, +			const struct pipe_blend_state *cso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nouveau_grobj *curie = nv40->screen->curie; +	struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso)); +	struct nouveau_stateobj *so = so_new(16, 0); + +	if (cso->blend_enable) { +		so_method(so, curie, NV40TCL_BLEND_ENABLE, 3); +		so_data  (so, 1); +		so_data  (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) | +			       nvgl_blend_func(cso->rgb_src_factor)); +		so_data  (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 | +			      nvgl_blend_func(cso->rgb_dst_factor)); +		so_method(so, curie, NV40TCL_BLEND_EQUATION, 1); +		so_data  (so, nvgl_blend_eqn(cso->alpha_func) << 16 | +			      nvgl_blend_eqn(cso->rgb_func)); +	} else { +		so_method(so, curie, NV40TCL_BLEND_ENABLE, 1); +		so_data  (so, 0); +	} + +	so_method(so, curie, NV40TCL_COLOR_MASK, 1); +	so_data  (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | +		       ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | +		       ((cso->colormask & PIPE_MASK_G) ? (0x01 <<  8) : 0) | +		       ((cso->colormask & PIPE_MASK_B) ? (0x01 <<  0) : 0))); + +	if (cso->logicop_enable) { +		so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2); +		so_data  (so, 1); +		so_data  (so, nvgl_logicop_func(cso->logicop_func)); +	} else { +		so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 1); +		so_data  (so, 0); +	} + +	so_method(so, curie, NV40TCL_DITHER_ENABLE, 1); +	so_data  (so, cso->dither ? 1 : 0); + +	so_ref(so, &bso->so); +	bso->pipe = *cso; +	return (void *)bso; +} + +static void +nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->blend = hwcso; +	nv40->dirty |= NV40_NEW_BLEND; +} + +static void +nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_blend_state *bso = hwcso; + +	so_ref(NULL, &bso->so); +	FREE(bso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { +	unsigned ret; + +	switch (wrap) { +	case PIPE_TEX_WRAP_REPEAT: +		ret = NV40TCL_TEX_WRAP_S_REPEAT; +		break; +	case PIPE_TEX_WRAP_MIRROR_REPEAT: +		ret = NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT; +		break; +	case PIPE_TEX_WRAP_CLAMP_TO_EDGE: +		ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE; +		break; +	case PIPE_TEX_WRAP_CLAMP_TO_BORDER: +		ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER; +		break; +	case PIPE_TEX_WRAP_CLAMP: +		ret = NV40TCL_TEX_WRAP_S_CLAMP; +		break; +	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: +		ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; +		break; +	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: +		ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; +		break; +	case PIPE_TEX_WRAP_MIRROR_CLAMP: +		ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; +		break; +	default: +		NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); +		ret = NV40TCL_TEX_WRAP_S_REPEAT; +		break; +	} + +	return ret >> NV40TCL_TEX_WRAP_S_SHIFT; +} + +static void * +nv40_sampler_state_create(struct pipe_context *pipe, +			  const struct pipe_sampler_state *cso) +{ +	struct nv40_sampler_state *ps; +	uint32_t filter = 0; + +	ps = MALLOC(sizeof(struct nv40_sampler_state)); + +	ps->fmt = 0; +	if (!cso->normalized_coords) +		ps->fmt |= NV40TCL_TEX_FORMAT_RECT; + +	ps->wrap = ((wrap_mode(cso->wrap_s) << NV40TCL_TEX_WRAP_S_SHIFT) | +		    (wrap_mode(cso->wrap_t) << NV40TCL_TEX_WRAP_T_SHIFT) | +		    (wrap_mode(cso->wrap_r) << NV40TCL_TEX_WRAP_R_SHIFT)); + +	ps->en = 0; +	if (cso->max_anisotropy >= 2.0) { +		/* no idea, binary driver sets it, works without it.. meh.. */ +		ps->wrap |= (1 << 5); + +		if (cso->max_anisotropy >= 16.0) { +			ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; +		} else +		if (cso->max_anisotropy >= 12.0) { +			ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; +		} else +		if (cso->max_anisotropy >= 10.0) { +			ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; +		} else +		if (cso->max_anisotropy >= 8.0) { +			ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; +		} else +		if (cso->max_anisotropy >= 6.0) { +			ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; +		} else +		if (cso->max_anisotropy >= 4.0) { +			ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; +		} else { +			ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; +		} +	} + +	switch (cso->mag_img_filter) { +	case PIPE_TEX_FILTER_LINEAR: +		filter |= NV40TCL_TEX_FILTER_MAG_LINEAR; +		break; +	case PIPE_TEX_FILTER_NEAREST: +	default: +		filter |= NV40TCL_TEX_FILTER_MAG_NEAREST; +		break; +	} + +	switch (cso->min_img_filter) { +	case PIPE_TEX_FILTER_LINEAR: +		switch (cso->min_mip_filter) { +		case PIPE_TEX_MIPFILTER_NEAREST: +			filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST; +			break; +		case PIPE_TEX_MIPFILTER_LINEAR: +			filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR; +			break; +		case PIPE_TEX_MIPFILTER_NONE: +		default: +			filter |= NV40TCL_TEX_FILTER_MIN_LINEAR; +			break; +		} +		break; +	case PIPE_TEX_FILTER_NEAREST: +	default: +		switch (cso->min_mip_filter) { +		case PIPE_TEX_MIPFILTER_NEAREST: +			filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST; +		break; +		case PIPE_TEX_MIPFILTER_LINEAR: +			filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR; +			break; +		case PIPE_TEX_MIPFILTER_NONE: +		default: +			filter |= NV40TCL_TEX_FILTER_MIN_NEAREST; +			break; +		} +		break; +	} + +	ps->filt = filter; + +	{ +		float limit; + +		limit = CLAMP(cso->lod_bias, -16.0, 15.0); +		ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + +		limit = CLAMP(cso->max_lod, 0.0, 15.0); +		ps->en |= (int)(limit * 256.0) << 7; + +		limit = CLAMP(cso->min_lod, 0.0, 15.0); +		ps->en |= (int)(limit * 256.0) << 19; +	} + + +	if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { +		switch (cso->compare_func) { +		case PIPE_FUNC_NEVER: +			ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NEVER; +			break; +		case PIPE_FUNC_GREATER: +			ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GREATER; +			break; +		case PIPE_FUNC_EQUAL: +			ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_EQUAL; +			break; +		case PIPE_FUNC_GEQUAL: +			ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GEQUAL; +			break; +		case PIPE_FUNC_LESS: +			ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LESS; +			break; +		case PIPE_FUNC_NOTEQUAL: +			ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL; +			break; +		case PIPE_FUNC_LEQUAL: +			ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LEQUAL; +			break; +		case PIPE_FUNC_ALWAYS: +			ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_ALWAYS; +			break; +		default: +			break; +		} +	} + +	ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | +		    (float_to_ubyte(cso->border_color[0]) << 16) | +		    (float_to_ubyte(cso->border_color[1]) <<  8) | +		    (float_to_ubyte(cso->border_color[2]) <<  0)); + +	return (void *)ps; +} + +static void +nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	unsigned unit; + +	for (unit = 0; unit < nr; unit++) { +		nv40->tex_sampler[unit] = sampler[unit]; +		nv40->dirty_samplers |= (1 << unit); +	} + +	for (unit = nr; unit < nv40->nr_samplers; unit++) { +		nv40->tex_sampler[unit] = NULL; +		nv40->dirty_samplers |= (1 << unit); +	} + +	nv40->nr_samplers = nr; +	nv40->dirty |= NV40_NEW_SAMPLER; +} + +static void +nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	FREE(hwcso); +} + +static void +nv40_set_sampler_texture(struct pipe_context *pipe, unsigned nr, +			 struct pipe_texture **miptree) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	unsigned unit; + +	for (unit = 0; unit < nr; unit++) { +		pipe_texture_reference((struct pipe_texture **) +				       &nv40->tex_miptree[unit], miptree[unit]); +		nv40->dirty_samplers |= (1 << unit); +	} + +	for (unit = nr; unit < nv40->nr_textures; unit++) { +		pipe_texture_reference((struct pipe_texture **) +				       &nv40->tex_miptree[unit], NULL); +		nv40->dirty_samplers |= (1 << unit); +	} + +	nv40->nr_textures = nr; +	nv40->dirty |= NV40_NEW_SAMPLER; +} + +static void * +nv40_rasterizer_state_create(struct pipe_context *pipe, +			     const struct pipe_rasterizer_state *cso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); +	struct nouveau_stateobj *so = so_new(32, 0); +	struct nouveau_grobj *curie = nv40->screen->curie; + +	/*XXX: ignored: +	 * 	light_twoside +	 * 	point_smooth -nohw +	 * 	multisample +	 */ + +	so_method(so, curie, NV40TCL_SHADE_MODEL, 1); +	so_data  (so, cso->flatshade ? NV40TCL_SHADE_MODEL_FLAT : +				       NV40TCL_SHADE_MODEL_SMOOTH); + +	so_method(so, curie, NV40TCL_LINE_WIDTH, 2); +	so_data  (so, (unsigned char)(cso->line_width * 8.0) & 0xff); +	so_data  (so, cso->line_smooth ? 1 : 0); +	so_method(so, curie, NV40TCL_LINE_STIPPLE_ENABLE, 2); +	so_data  (so, cso->line_stipple_enable ? 1 : 0); +	so_data  (so, (cso->line_stipple_pattern << 16) | +		       cso->line_stipple_factor); + +	so_method(so, curie, NV40TCL_POINT_SIZE, 1); +	so_data  (so, fui(cso->point_size)); + +	so_method(so, curie, NV40TCL_POLYGON_MODE_FRONT, 6); +	if (cso->front_winding == PIPE_WINDING_CCW) { +		so_data(so, nvgl_polygon_mode(cso->fill_ccw)); +		so_data(so, nvgl_polygon_mode(cso->fill_cw)); +		switch (cso->cull_mode) { +		case PIPE_WINDING_CCW: +			so_data(so, NV40TCL_CULL_FACE_FRONT); +			break; +		case PIPE_WINDING_CW: +			so_data(so, NV40TCL_CULL_FACE_BACK); +			break; +		case PIPE_WINDING_BOTH: +			so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); +			break; +		default: +			so_data(so, NV40TCL_CULL_FACE_BACK); +			break; +		} +		so_data(so, NV40TCL_FRONT_FACE_CCW); +	} else { +		so_data(so, nvgl_polygon_mode(cso->fill_cw)); +		so_data(so, nvgl_polygon_mode(cso->fill_ccw)); +		switch (cso->cull_mode) { +		case PIPE_WINDING_CCW: +			so_data(so, NV40TCL_CULL_FACE_BACK); +			break; +		case PIPE_WINDING_CW: +			so_data(so, NV40TCL_CULL_FACE_FRONT); +			break; +		case PIPE_WINDING_BOTH: +			so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); +			break; +		default: +			so_data(so, NV40TCL_CULL_FACE_BACK); +			break; +		} +		so_data(so, NV40TCL_FRONT_FACE_CW); +	} +	so_data(so, cso->poly_smooth ? 1 : 0); +	so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); + +	so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); +	so_data  (so, cso->poly_stipple_enable ? 1 : 0); + +	so_method(so, curie, NV40TCL_POLYGON_OFFSET_POINT_ENABLE, 3); +	if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || +	    (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) +		so_data(so, 1); +	else +		so_data(so, 0); +	if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || +	    (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) +		so_data(so, 1); +	else +		so_data(so, 0); +	if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || +	    (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) +		so_data(so, 1); +	else +		so_data(so, 0); +	if (cso->offset_cw || cso->offset_ccw) { +		so_method(so, curie, NV40TCL_POLYGON_OFFSET_FACTOR, 2); +		so_data  (so, fui(cso->offset_scale)); +		so_data  (so, fui(cso->offset_units * 2)); +	} + +	so_method(so, curie, NV40TCL_POINT_SPRITE, 1); +	if (cso->point_sprite) { +		unsigned psctl = (1 << 0), i; + +		for (i = 0; i < 8; i++) { +			if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) +				psctl |= (1 << (8 + i)); +		} + +		so_data(so, psctl); +	} else { +		so_data(so, 0); +	} + +	so_ref(so, &rsso->so); +	rsso->pipe = *cso; +	return (void *)rsso; +} + +static void +nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->rasterizer = hwcso; +	nv40->dirty |= NV40_NEW_RAST; +	nv40->draw_dirty |= NV40_NEW_RAST; +} + +static void +nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_rasterizer_state *rsso = hwcso; + +	so_ref(NULL, &rsso->so); +	FREE(rsso); +} + +static void * +nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, +			const struct pipe_depth_stencil_alpha_state *cso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); +	struct nouveau_stateobj *so = so_new(32, 0); +	struct nouveau_grobj *curie = nv40->screen->curie; + +	so_method(so, curie, NV40TCL_DEPTH_FUNC, 3); +	so_data  (so, nvgl_comparison_op(cso->depth.func)); +	so_data  (so, cso->depth.writemask ? 1 : 0); +	so_data  (so, cso->depth.enabled ? 1 : 0); + +	so_method(so, curie, NV40TCL_ALPHA_TEST_ENABLE, 3); +	so_data  (so, cso->alpha.enabled ? 1 : 0); +	so_data  (so, nvgl_comparison_op(cso->alpha.func)); +	so_data  (so, float_to_ubyte(cso->alpha.ref_value)); + +	if (cso->stencil[0].enabled) { +		so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 8); +		so_data  (so, cso->stencil[0].enabled ? 1 : 0); +		so_data  (so, cso->stencil[0].writemask); +		so_data  (so, nvgl_comparison_op(cso->stencil[0].func)); +		so_data  (so, cso->stencil[0].ref_value); +		so_data  (so, cso->stencil[0].valuemask); +		so_data  (so, nvgl_stencil_op(cso->stencil[0].fail_op)); +		so_data  (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); +		so_data  (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); +	} else { +		so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); +		so_data  (so, 0); +	} + +	if (cso->stencil[1].enabled) { +		so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 8); +		so_data  (so, cso->stencil[1].enabled ? 1 : 0); +		so_data  (so, cso->stencil[1].writemask); +		so_data  (so, nvgl_comparison_op(cso->stencil[1].func)); +		so_data  (so, cso->stencil[1].ref_value); +		so_data  (so, cso->stencil[1].valuemask); +		so_data  (so, nvgl_stencil_op(cso->stencil[1].fail_op)); +		so_data  (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); +		so_data  (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); +	} else { +		so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 1); +		so_data  (so, 0); +	} + +	so_ref(so, &zsaso->so); +	zsaso->pipe = *cso; +	return (void *)zsaso; +} + +static void +nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->zsa = hwcso; +	nv40->dirty |= NV40_NEW_ZSA; +} + +static void +nv40_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_zsa_state *zsaso = hwcso; + +	so_ref(NULL, &zsaso->so); +	FREE(zsaso); +} + +static void * +nv40_vp_state_create(struct pipe_context *pipe, +		     const struct pipe_shader_state *cso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_vertex_program *vp; + +	vp = CALLOC(1, sizeof(struct nv40_vertex_program)); +	vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); +	vp->draw = draw_create_vertex_shader(nv40->draw, &vp->pipe); + +	return (void *)vp; +} + +static void +nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->vertprog = hwcso; +	nv40->dirty |= NV40_NEW_VERTPROG; +	nv40->draw_dirty |= NV40_NEW_VERTPROG; +} + +static void +nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_vertex_program *vp = hwcso; + +	draw_delete_vertex_shader(nv40->draw, vp->draw); +	nv40_vertprog_destroy(nv40, vp); +	FREE((void*)vp->pipe.tokens); +	FREE(vp); +} + +static void * +nv40_fp_state_create(struct pipe_context *pipe, +		     const struct pipe_shader_state *cso) +{ +	struct nv40_fragment_program *fp; + +	fp = CALLOC(1, sizeof(struct nv40_fragment_program)); +	fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + +	tgsi_scan_shader(fp->pipe.tokens, &fp->info); + +	return (void *)fp; +} + +static void +nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->fragprog = hwcso; +	nv40->dirty |= NV40_NEW_FRAGPROG; +} + +static void +nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_fragment_program *fp = hwcso; + +	nv40_fragprog_destroy(nv40, fp); +	FREE((void*)fp->pipe.tokens); +	FREE(fp); +} + +static void +nv40_set_blend_color(struct pipe_context *pipe, +		     const struct pipe_blend_color *bcol) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->blend_colour = *bcol; +	nv40->dirty |= NV40_NEW_BCOL; +} + +static void +nv40_set_clip_state(struct pipe_context *pipe, +		    const struct pipe_clip_state *clip) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->clip = *clip; +	nv40->dirty |= NV40_NEW_UCP; +	nv40->draw_dirty |= NV40_NEW_UCP; +} + +static void +nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, +			 const struct pipe_constant_buffer *buf ) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->constbuf[shader] = buf->buffer; +	nv40->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); + +	if (shader == PIPE_SHADER_VERTEX) { +		nv40->dirty |= NV40_NEW_VERTPROG; +	} else +	if (shader == PIPE_SHADER_FRAGMENT) { +		nv40->dirty |= NV40_NEW_FRAGPROG; +	} +} + +static void +nv40_set_framebuffer_state(struct pipe_context *pipe, +			   const struct pipe_framebuffer_state *fb) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->framebuffer = *fb; +	nv40->dirty |= NV40_NEW_FB; +} + +static void +nv40_set_polygon_stipple(struct pipe_context *pipe, +			 const struct pipe_poly_stipple *stipple) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	memcpy(nv40->stipple, stipple->stipple, 4 * 32); +	nv40->dirty |= NV40_NEW_STIPPLE; +} + +static void +nv40_set_scissor_state(struct pipe_context *pipe, +		       const struct pipe_scissor_state *s) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->scissor = *s; +	nv40->dirty |= NV40_NEW_SCISSOR; +} + +static void +nv40_set_viewport_state(struct pipe_context *pipe, +			const struct pipe_viewport_state *vpt) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->viewport = *vpt; +	nv40->dirty |= NV40_NEW_VIEWPORT; +	nv40->draw_dirty |= NV40_NEW_VIEWPORT; +} + +static void +nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count, +			const struct pipe_vertex_buffer *vb) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	memcpy(nv40->vtxbuf, vb, sizeof(*vb) * count); +	nv40->vtxbuf_nr = count; + +	nv40->dirty |= NV40_NEW_ARRAYS; +	nv40->draw_dirty |= NV40_NEW_ARRAYS; +} + +static void +nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count, +			 const struct pipe_vertex_element *ve) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	memcpy(nv40->vtxelt, ve, sizeof(*ve) * count); +	nv40->vtxelt_nr = count; + +	nv40->dirty |= NV40_NEW_ARRAYS; +	nv40->draw_dirty |= NV40_NEW_ARRAYS; +} + +static void +nv40_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->edgeflags = bitfield; +	nv40->dirty |= NV40_NEW_ARRAYS; +	nv40->draw_dirty |= NV40_NEW_ARRAYS; +} + +void +nv40_init_state_functions(struct nv40_context *nv40) +{ +	nv40->pipe.create_blend_state = nv40_blend_state_create; +	nv40->pipe.bind_blend_state = nv40_blend_state_bind; +	nv40->pipe.delete_blend_state = nv40_blend_state_delete; + +	nv40->pipe.create_sampler_state = nv40_sampler_state_create; +	nv40->pipe.bind_sampler_states = nv40_sampler_state_bind; +	nv40->pipe.delete_sampler_state = nv40_sampler_state_delete; +	nv40->pipe.set_sampler_textures = nv40_set_sampler_texture; + +	nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create; +	nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind; +	nv40->pipe.delete_rasterizer_state = nv40_rasterizer_state_delete; + +	nv40->pipe.create_depth_stencil_alpha_state = +		nv40_depth_stencil_alpha_state_create; +	nv40->pipe.bind_depth_stencil_alpha_state = +		nv40_depth_stencil_alpha_state_bind; +	nv40->pipe.delete_depth_stencil_alpha_state = +		nv40_depth_stencil_alpha_state_delete; + +	nv40->pipe.create_vs_state = nv40_vp_state_create; +	nv40->pipe.bind_vs_state = nv40_vp_state_bind; +	nv40->pipe.delete_vs_state = nv40_vp_state_delete; + +	nv40->pipe.create_fs_state = nv40_fp_state_create; +	nv40->pipe.bind_fs_state = nv40_fp_state_bind; +	nv40->pipe.delete_fs_state = nv40_fp_state_delete; + +	nv40->pipe.set_blend_color = nv40_set_blend_color; +	nv40->pipe.set_clip_state = nv40_set_clip_state; +	nv40->pipe.set_constant_buffer = nv40_set_constant_buffer; +	nv40->pipe.set_framebuffer_state = nv40_set_framebuffer_state; +	nv40->pipe.set_polygon_stipple = nv40_set_polygon_stipple; +	nv40->pipe.set_scissor_state = nv40_set_scissor_state; +	nv40->pipe.set_viewport_state = nv40_set_viewport_state; + +	nv40->pipe.set_edgeflags = nv40_set_edgeflags; +	nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers; +	nv40->pipe.set_vertex_elements = nv40_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h new file mode 100644 index 0000000000..9c55903ae3 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -0,0 +1,91 @@ +#ifndef __NV40_STATE_H__ +#define __NV40_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv40_sampler_state { +	uint32_t fmt; +	uint32_t wrap; +	uint32_t en; +	uint32_t filt; +	uint32_t bcol; +}; + +struct nv40_vertex_program_exec { +	uint32_t data[4]; +	boolean has_branch_offset; +	int const_index; +}; + +struct nv40_vertex_program_data { +	int index; /* immediates == -1 */ +	float value[4]; +}; + +struct nv40_vertex_program { +	struct pipe_shader_state pipe; + +	struct draw_vertex_shader *draw; + +	boolean translated; + +	struct pipe_clip_state ucp; + +	struct nv40_vertex_program_exec *insns; +	unsigned nr_insns; +	struct nv40_vertex_program_data *consts; +	unsigned nr_consts; + +	struct nouveau_resource *exec; +	unsigned exec_start; +	struct nouveau_resource *data; +	unsigned data_start; +	unsigned data_start_min; + +	uint32_t ir; +	uint32_t or; +	uint32_t clip_ctrl; +	struct nouveau_stateobj *so; +}; + +struct nv40_fragment_program_data { +	unsigned offset; +	unsigned index; +}; + +struct nv40_fragment_program { +	struct pipe_shader_state pipe; +	struct tgsi_shader_info info; + +	boolean translated; +	unsigned samplers; + +	uint32_t *insn; +	int       insn_len; + +	struct nv40_fragment_program_data *consts; +	unsigned nr_consts; + +	struct pipe_buffer *buffer; + +	uint32_t fp_control; +	struct nouveau_stateobj *so; +}; + +struct nv40_miptree { +	struct pipe_texture base; + +	struct pipe_buffer *buffer; +	uint total_size; + +	struct pipe_texture *shadow_tex; +	struct pipe_surface *shadow_surface; + +	struct { +		uint pitch; +		uint *image_offset; +	} level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c new file mode 100644 index 0000000000..95e6d7394f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_blend.c @@ -0,0 +1,40 @@ +#include "nv40_context.h" + +static boolean +nv40_state_blend_validate(struct nv40_context *nv40) +{ +	so_ref(nv40->blend->so, &nv40->state.hw[NV40_STATE_BLEND]); +	return TRUE; +} + +struct nv40_state_entry nv40_state_blend = { +	.validate = nv40_state_blend_validate, +	.dirty = { +		.pipe = NV40_NEW_BLEND, +		.hw = NV40_STATE_BLEND +	} +}; + +static boolean +nv40_state_blend_colour_validate(struct nv40_context *nv40) +{ +	struct nouveau_stateobj *so = so_new(2, 0); +	struct pipe_blend_color *bcol = &nv40->blend_colour; + +	so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1); +	so_data  (so, ((float_to_ubyte(bcol->color[3]) << 24) | +		       (float_to_ubyte(bcol->color[0]) << 16) | +		       (float_to_ubyte(bcol->color[1]) <<  8) | +		       (float_to_ubyte(bcol->color[2]) <<  0))); + +	so_ref(so, &nv40->state.hw[NV40_STATE_BCOL]); +	return TRUE; +} + +struct nv40_state_entry nv40_state_blend_colour = { +	.validate = nv40_state_blend_colour_validate, +	.dirty = { +		.pipe = NV40_NEW_BCOL, +		.hw = NV40_STATE_BCOL +	} +}; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c new file mode 100644 index 0000000000..ce859def10 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -0,0 +1,184 @@ +#include "nv40_context.h" +#include "nv40_state.h" +#include "draw/draw_context.h" + +static struct nv40_state_entry *render_states[] = { +	&nv40_state_framebuffer, +	&nv40_state_rasterizer, +	&nv40_state_scissor, +	&nv40_state_stipple, +	&nv40_state_fragprog, +	&nv40_state_fragtex, +	&nv40_state_vertprog, +	&nv40_state_blend, +	&nv40_state_blend_colour, +	&nv40_state_zsa, +	&nv40_state_viewport, +	&nv40_state_vbo, +	NULL +}; + +static struct nv40_state_entry *swtnl_states[] = { +	&nv40_state_framebuffer, +	&nv40_state_rasterizer, +	&nv40_state_scissor, +	&nv40_state_stipple, +	&nv40_state_fragprog, +	&nv40_state_fragtex, +	&nv40_state_vertprog, +	&nv40_state_blend, +	&nv40_state_blend_colour, +	&nv40_state_zsa, +	&nv40_state_viewport, +	&nv40_state_vtxfmt, +	NULL +}; + +static void +nv40_state_do_validate(struct nv40_context *nv40, +		       struct nv40_state_entry **states) +{ +	const struct pipe_framebuffer_state *fb = &nv40->framebuffer; +	unsigned i; + +	for (i = 0; i < fb->nr_cbufs; i++) +		fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; +	if (fb->zsbuf) +		fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + +	while (*states) { +		struct nv40_state_entry *e = *states; + +		if (nv40->dirty & e->dirty.pipe) { +			if (e->validate(nv40)) +				nv40->state.dirty |= (1ULL << e->dirty.hw); +		} + +		states++; +	} +	nv40->dirty = 0; +} + +void +nv40_state_emit(struct nv40_context *nv40) +{ +	struct nv40_state *state = &nv40->state; +	struct nv40_screen *screen = nv40->screen; +	unsigned i, samplers; +	uint64_t states; + +	if (nv40->pctx_id != screen->cur_pctx) { +		for (i = 0; i < NV40_STATE_MAX; i++) { +			if (state->hw[i] && screen->state[i] != state->hw[i]) +				state->dirty |= (1ULL << i); +		} + +		screen->cur_pctx = nv40->pctx_id; +	} + +	for (i = 0, states = state->dirty; states; i++) { +		if (!(states & (1ULL << i))) +			continue; +		so_ref (state->hw[i], &nv40->screen->state[i]); +		if (state->hw[i]) +			so_emit(nv40->nvws, nv40->screen->state[i]); +		states &= ~(1ULL << i); +	} + +	if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) | +			    (1ULL << NV40_STATE_FRAGTEX0))) { +		BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); +		OUT_RING  (2); +		BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); +		OUT_RING  (1); +	} + +	state->dirty = 0; + +	so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FB]); +	for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { +		if (!(samplers & (1 << i))) +			continue; +		so_emit_reloc_markers(nv40->nvws, +				      state->hw[NV40_STATE_FRAGTEX0+i]); +		samplers &= ~(1ULL << i); +	} +	so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FRAGPROG]); +	if (state->hw[NV40_STATE_VTXBUF] && nv40->render_mode == HW) +		so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_VTXBUF]); +} + +boolean +nv40_state_validate(struct nv40_context *nv40) +{ +	boolean was_sw = nv40->fallback_swtnl ? TRUE : FALSE; + +	if (nv40->render_mode != HW) { +		/* Don't even bother trying to go back to hw if none +		 * of the states that caused swtnl previously have changed. +		 */ +		if ((nv40->fallback_swtnl & nv40->dirty) +				!= nv40->fallback_swtnl) +			return FALSE; + +		/* Attempt to go to hwtnl again */ +		nv40->pipe.flush(&nv40->pipe, 0, NULL); +		nv40->dirty |= (NV40_NEW_VIEWPORT | +				NV40_NEW_VERTPROG | +				NV40_NEW_ARRAYS); +		nv40->render_mode = HW; +	} + +	nv40_state_do_validate(nv40, render_states); +	if (nv40->fallback_swtnl || nv40->fallback_swrast) +		return FALSE; +	 +	if (was_sw) +		NOUVEAU_ERR("swtnl->hw\n"); + +	return TRUE; +} + +boolean +nv40_state_validate_swtnl(struct nv40_context *nv40) +{ +	struct draw_context *draw = nv40->draw; + +	/* Setup for swtnl */ +	if (nv40->render_mode == HW) { +		NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl); +		nv40->pipe.flush(&nv40->pipe, 0, NULL); +		nv40->dirty |= (NV40_NEW_VIEWPORT | +				NV40_NEW_VERTPROG | +				NV40_NEW_ARRAYS); +		nv40->render_mode = SWTNL; +	} + +	if (nv40->draw_dirty & NV40_NEW_VERTPROG) +		draw_bind_vertex_shader(draw, nv40->vertprog->draw); + +	if (nv40->draw_dirty & NV40_NEW_RAST) +		draw_set_rasterizer_state(draw, &nv40->rasterizer->pipe); + +	if (nv40->draw_dirty & NV40_NEW_UCP) +		draw_set_clip_state(draw, &nv40->clip); + +	if (nv40->draw_dirty & NV40_NEW_VIEWPORT) +		draw_set_viewport_state(draw, &nv40->viewport); + +	if (nv40->draw_dirty & NV40_NEW_ARRAYS) { +		draw_set_edgeflags(draw, nv40->edgeflags); +		draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf); +		draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt);	 +	} + +	nv40_state_do_validate(nv40, swtnl_states); +	if (nv40->fallback_swrast) { +		NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast); +		return FALSE; +	} + +	nv40->draw_dirty = 0; +	return TRUE; +} + diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c new file mode 100644 index 0000000000..454abad31f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -0,0 +1,162 @@ +#include "nv40_context.h" +#include "nouveau/nouveau_util.h" + +static struct pipe_buffer * +nv40_surface_buffer(struct pipe_surface *surface) +{ +	struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; +	return mt->buffer; +} + +static boolean +nv40_state_framebuffer_validate(struct nv40_context *nv40) +{ +	struct pipe_framebuffer_state *fb = &nv40->framebuffer; +	struct pipe_surface *rt[4], *zeta; +	uint32_t rt_enable, rt_format; +	int i, colour_format = 0, zeta_format = 0; +	struct nouveau_stateobj *so = so_new(64, 10); +	unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; +	unsigned w = fb->width; +	unsigned h = fb->height; + +	rt_enable = 0; +	for (i = 0; i < fb->nr_cbufs; i++) { +		if (colour_format) { +			assert(colour_format == fb->cbufs[i]->format); +		} else { +			colour_format = fb->cbufs[i]->format; +			rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i); +			rt[i] = fb->cbufs[i]; +		} +	} + +	if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | +			 NV40TCL_RT_ENABLE_COLOR3)) +		rt_enable |= NV40TCL_RT_ENABLE_MRT; + +	if (fb->zsbuf) { +		zeta_format = fb->zsbuf->format; +		zeta = fb->zsbuf; +	} + +	if (!(rt[0]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { +		assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); +		for (i = 1; i < fb->nr_cbufs; i++) +			assert(!(rt[i]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); + +		rt_format = NV40TCL_RT_FORMAT_TYPE_SWIZZLED | +		            log2i(fb->width) << NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT | +		            log2i(fb->height) << NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT; +	} +	else +		rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR; + +	switch (colour_format) { +	case PIPE_FORMAT_A8R8G8B8_UNORM: +	case 0: +		rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8; +		break; +	case PIPE_FORMAT_R5G6B5_UNORM: +		rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5; +		break; +	default: +		assert(0); +	} + +	switch (zeta_format) { +	case PIPE_FORMAT_Z16_UNORM: +		rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16; +		break; +	case PIPE_FORMAT_Z24S8_UNORM: +	case 0: +		rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8; +		break; +	default: +		assert(0); +	} + +	if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { +		so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR0, 1); +		so_reloc (so, nv40_surface_buffer(rt[0]), 0, rt_flags | NOUVEAU_BO_OR, +			  nv40->nvws->channel->vram->handle, +			  nv40->nvws->channel->gart->handle); +		so_method(so, nv40->screen->curie, NV40TCL_COLOR0_PITCH, 2); +		so_data  (so, rt[0]->stride); +		so_reloc (so, nv40_surface_buffer(rt[0]), rt[0]->offset, rt_flags | +			  NOUVEAU_BO_LOW, 0, 0); +	} + +	if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { +		so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR1, 1); +		so_reloc (so, nv40_surface_buffer(rt[1]), 0, rt_flags | NOUVEAU_BO_OR, +			  nv40->nvws->channel->vram->handle, +			  nv40->nvws->channel->gart->handle); +		so_method(so, nv40->screen->curie, NV40TCL_COLOR1_OFFSET, 2); +		so_reloc (so, nv40_surface_buffer(rt[1]), rt[1]->offset, rt_flags | +			  NOUVEAU_BO_LOW, 0, 0); +		so_data  (so, rt[1]->stride); +	} + +	if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { +		so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR2, 1); +		so_reloc (so, nv40_surface_buffer(rt[2]), 0, rt_flags | NOUVEAU_BO_OR, +			  nv40->nvws->channel->vram->handle, +			  nv40->nvws->channel->gart->handle); +		so_method(so, nv40->screen->curie, NV40TCL_COLOR2_OFFSET, 1); +		so_reloc (so, nv40_surface_buffer(rt[2]), rt[2]->offset, rt_flags | +			  NOUVEAU_BO_LOW, 0, 0); +		so_method(so, nv40->screen->curie, NV40TCL_COLOR2_PITCH, 1); +		so_data  (so, rt[2]->stride); +	} + +	if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { +		so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR3, 1); +		so_reloc (so, nv40_surface_buffer(rt[3]), 0, rt_flags | NOUVEAU_BO_OR, +			  nv40->nvws->channel->vram->handle, +			  nv40->nvws->channel->gart->handle); +		so_method(so, nv40->screen->curie, NV40TCL_COLOR3_OFFSET, 1); +		so_reloc (so, nv40_surface_buffer(rt[3]), rt[3]->offset, rt_flags | +			  NOUVEAU_BO_LOW, 0, 0); +		so_method(so, nv40->screen->curie, NV40TCL_COLOR3_PITCH, 1); +		so_data  (so, rt[3]->stride); +	} + +	if (zeta_format) { +		so_method(so, nv40->screen->curie, NV40TCL_DMA_ZETA, 1); +		so_reloc (so, nv40_surface_buffer(zeta), 0, rt_flags | NOUVEAU_BO_OR, +			  nv40->nvws->channel->vram->handle, +			  nv40->nvws->channel->gart->handle); +		so_method(so, nv40->screen->curie, NV40TCL_ZETA_OFFSET, 1); +		so_reloc (so, nv40_surface_buffer(zeta), zeta->offset, rt_flags | +			  NOUVEAU_BO_LOW, 0, 0); +		so_method(so, nv40->screen->curie, NV40TCL_ZETA_PITCH, 1); +		so_data  (so, zeta->stride); +	} + +	so_method(so, nv40->screen->curie, NV40TCL_RT_ENABLE, 1); +	so_data  (so, rt_enable); +	so_method(so, nv40->screen->curie, NV40TCL_RT_HORIZ, 3); +	so_data  (so, (w << 16) | 0); +	so_data  (so, (h << 16) | 0); +	so_data  (so, rt_format); +	so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_HORIZ, 2); +	so_data  (so, (w << 16) | 0); +	so_data  (so, (h << 16) | 0); +	so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); +	so_data  (so, ((w - 1) << 16) | 0); +	so_data  (so, ((h - 1) << 16) | 0); +	so_method(so, nv40->screen->curie, 0x1d88, 1); +	so_data  (so, (1 << 12) | h); + +	so_ref(so, &nv40->state.hw[NV40_STATE_FB]); +	return TRUE; +} + +struct nv40_state_entry nv40_state_framebuffer = { +	.validate = nv40_state_framebuffer_validate, +	.dirty = { +		.pipe = NV40_NEW_FB, +		.hw = NV40_STATE_FB +	} +}; diff --git a/src/gallium/drivers/nv40/nv40_state_rasterizer.c b/src/gallium/drivers/nv40/nv40_state_rasterizer.c new file mode 100644 index 0000000000..9ecda5990f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_rasterizer.c @@ -0,0 +1,17 @@ +#include "nv40_context.h" + +static boolean +nv40_state_rasterizer_validate(struct nv40_context *nv40) +{ +	so_ref(nv40->rasterizer->so, +	       &nv40->state.hw[NV40_STATE_RAST]); +	return TRUE; +} + +struct nv40_state_entry nv40_state_rasterizer = { +	.validate = nv40_state_rasterizer_validate, +	.dirty = { +		.pipe = NV40_NEW_RAST, +		.hw = NV40_STATE_RAST +	} +}; diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c new file mode 100644 index 0000000000..285239ef41 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -0,0 +1,35 @@ +#include "nv40_context.h" + +static boolean +nv40_state_scissor_validate(struct nv40_context *nv40) +{ +	struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; +	struct pipe_scissor_state *s = &nv40->scissor; +	struct nouveau_stateobj *so; + +	if (nv40->state.hw[NV40_STATE_SCISSOR] && +	    (rast->scissor == 0 && nv40->state.scissor_enabled == 0)) +		return FALSE; +	nv40->state.scissor_enabled = rast->scissor; + +	so = so_new(3, 0); +	so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2); +	if (nv40->state.scissor_enabled) { +		so_data  (so, ((s->maxx - s->minx) << 16) | s->minx); +		so_data  (so, ((s->maxy - s->miny) << 16) | s->miny); +	} else { +		so_data  (so, 4096 << 16); +		so_data  (so, 4096 << 16); +	} + +	so_ref(so, &nv40->state.hw[NV40_STATE_SCISSOR]); +	return TRUE; +} + +struct nv40_state_entry nv40_state_scissor = { +	.validate = nv40_state_scissor_validate, +	.dirty = { +		.pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST, +		.hw = NV40_STATE_SCISSOR +	} +}; diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c new file mode 100644 index 0000000000..b51024ad9b --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -0,0 +1,39 @@ +#include "nv40_context.h" + +static boolean +nv40_state_stipple_validate(struct nv40_context *nv40) +{ +	struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; +	struct nouveau_grobj *curie = nv40->screen->curie; +	struct nouveau_stateobj *so; + +	if (nv40->state.hw[NV40_STATE_STIPPLE] && +	   (rast->poly_stipple_enable == 0 && nv40->state.stipple_enabled == 0)) +		return FALSE; + +	if (rast->poly_stipple_enable) { +		unsigned i; + +		so = so_new(35, 0); +		so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); +		so_data  (so, 1); +		so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); +		for (i = 0; i < 32; i++) +			so_data(so, nv40->stipple[i]); +	} else { +		so = so_new(2, 0); +		so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); +		so_data  (so, 0); +	} + +	so_ref(so, &nv40->state.hw[NV40_STATE_STIPPLE]); +	return TRUE; +} + +struct nv40_state_entry nv40_state_stipple = { +	.validate = nv40_state_stipple_validate, +	.dirty = { +		.pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST, +		.hw = NV40_STATE_STIPPLE, +	} +}; diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c new file mode 100644 index 0000000000..869a55b405 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -0,0 +1,67 @@ +#include "nv40_context.h" + +static boolean +nv40_state_viewport_validate(struct nv40_context *nv40) +{ +	struct pipe_viewport_state *vpt = &nv40->viewport; +	struct nouveau_stateobj *so; +	unsigned bypass; + +	if (nv40->render_mode == HW && !nv40->rasterizer->pipe.bypass_clipping) +		bypass = 0; +	else +		bypass = 1; + +	if (nv40->state.hw[NV40_STATE_VIEWPORT] && +	    (bypass || !(nv40->dirty & NV40_NEW_VIEWPORT)) && +	    nv40->state.viewport_bypass == bypass) +		return FALSE; +	nv40->state.viewport_bypass = bypass; + +	so = so_new(11, 0); +	if (!bypass) { +		so_method(so, nv40->screen->curie, +			  NV40TCL_VIEWPORT_TRANSLATE_X, 8); +		so_data  (so, fui(vpt->translate[0])); +		so_data  (so, fui(vpt->translate[1])); +		so_data  (so, fui(vpt->translate[2])); +		so_data  (so, fui(vpt->translate[3])); +		so_data  (so, fui(vpt->scale[0])); +		so_data  (so, fui(vpt->scale[1])); +		so_data  (so, fui(vpt->scale[2])); +		so_data  (so, fui(vpt->scale[3])); +		so_method(so, nv40->screen->curie, 0x1d78, 1); +		so_data  (so, 1); +	} else { +		so_method(so, nv40->screen->curie, +			  NV40TCL_VIEWPORT_TRANSLATE_X, 8); +		so_data  (so, fui(0.0)); +		so_data  (so, fui(0.0)); +		so_data  (so, fui(0.0)); +		so_data  (so, fui(0.0)); +		so_data  (so, fui(1.0)); +		so_data  (so, fui(1.0)); +		so_data  (so, fui(1.0)); +		so_data  (so, fui(0.0)); +		/* Not entirely certain what this is yet.  The DDX uses this +		 * value also as it fixes rendering when you pass +		 * pre-transformed vertices to the GPU.  My best gusss is that +		 * this bypasses some culling/clipping stage.  Might be worth +		 * noting that points/lines are uneffected by whatever this +		 * value fixes, only filled polygons are effected. +		 */ +		so_method(so, nv40->screen->curie, 0x1d78, 1); +		so_data  (so, 0x110); +	} + +	so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]); +	return TRUE; +} + +struct nv40_state_entry nv40_state_viewport = { +	.validate = nv40_state_viewport_validate, +	.dirty = { +		.pipe = NV40_NEW_VIEWPORT | NV40_NEW_RAST, +		.hw = NV40_STATE_VIEWPORT +	} +}; diff --git a/src/gallium/drivers/nv40/nv40_state_zsa.c b/src/gallium/drivers/nv40/nv40_state_zsa.c new file mode 100644 index 0000000000..fb760677c8 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_zsa.c @@ -0,0 +1,17 @@ +#include "nv40_context.h" + +static boolean +nv40_state_zsa_validate(struct nv40_context *nv40) +{ +	so_ref(nv40->zsa->so, +	       &nv40->state.hw[NV40_STATE_ZSA]); +	return TRUE; +} + +struct nv40_state_entry nv40_state_zsa = { +	.validate = nv40_state_zsa_validate, +	.dirty = { +		.pipe = NV40_NEW_ZSA, +		.hw = NV40_STATE_ZSA +	} +}; diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c new file mode 100644 index 0000000000..c4a5fb20d9 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_surface.c @@ -0,0 +1,72 @@ + +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "nv40_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" + +static void +nv40_surface_copy(struct pipe_context *pipe, boolean do_flip, +		  struct pipe_surface *dest, unsigned destx, unsigned desty, +		  struct pipe_surface *src, unsigned srcx, unsigned srcy, +		  unsigned width, unsigned height) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv04_surface_2d *eng2d = nv40->screen->eng2d; + +	if (do_flip) { +		desty += height; +		while (height--) { +			eng2d->copy(eng2d, dest, destx, desty--, src, +				    srcx, srcy++, width, 1); +		} +		return; +	} + +	eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); +} + +static void +nv40_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, +		  unsigned destx, unsigned desty, unsigned width, +		  unsigned height, unsigned value) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv04_surface_2d *eng2d = nv40->screen->eng2d; + +	eng2d->fill(eng2d, dest, destx, desty, width, height, value); +} + +void +nv40_init_surface_functions(struct nv40_context *nv40) +{ +	nv40->pipe.surface_copy = nv40_surface_copy; +	nv40->pipe.surface_fill = nv40_surface_fill; +} diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c new file mode 100644 index 0000000000..8f1834628f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -0,0 +1,555 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nouveau_util.h" + +#define FORCE_SWTNL 0 + +static INLINE int +nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) +{ +	switch (pipe) { +	case PIPE_FORMAT_R32_FLOAT: +	case PIPE_FORMAT_R32G32_FLOAT: +	case PIPE_FORMAT_R32G32B32_FLOAT: +	case PIPE_FORMAT_R32G32B32A32_FLOAT: +		*fmt = NV40TCL_VTXFMT_TYPE_FLOAT; +		break; +	case PIPE_FORMAT_R8_UNORM: +	case PIPE_FORMAT_R8G8_UNORM: +	case PIPE_FORMAT_R8G8B8_UNORM: +	case PIPE_FORMAT_R8G8B8A8_UNORM: +		*fmt = NV40TCL_VTXFMT_TYPE_UBYTE; +		break; +	case PIPE_FORMAT_R16_SSCALED: +	case PIPE_FORMAT_R16G16_SSCALED: +	case PIPE_FORMAT_R16G16B16_SSCALED: +	case PIPE_FORMAT_R16G16B16A16_SSCALED: +		*fmt = NV40TCL_VTXFMT_TYPE_USHORT; +		break; +	default: +		NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); +		return 1; +	} + +	switch (pipe) { +	case PIPE_FORMAT_R8_UNORM: +	case PIPE_FORMAT_R32_FLOAT: +	case PIPE_FORMAT_R16_SSCALED: +		*ncomp = 1; +		break; +	case PIPE_FORMAT_R8G8_UNORM: +	case PIPE_FORMAT_R32G32_FLOAT: +	case PIPE_FORMAT_R16G16_SSCALED: +		*ncomp = 2; +		break; +	case PIPE_FORMAT_R8G8B8_UNORM: +	case PIPE_FORMAT_R32G32B32_FLOAT: +	case PIPE_FORMAT_R16G16B16_SSCALED: +		*ncomp = 3; +		break; +	case PIPE_FORMAT_R8G8B8A8_UNORM: +	case PIPE_FORMAT_R32G32B32A32_FLOAT: +	case PIPE_FORMAT_R16G16B16A16_SSCALED: +		*ncomp = 4; +		break; +	default: +		NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); +		return 1; +	} + +	return 0; +} + +static boolean +nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib, +		    unsigned ib_size) +{ +	struct pipe_screen *pscreen = &nv40->screen->pipe; +	unsigned type; + +	if (!ib) { +		nv40->idxbuf = NULL; +		nv40->idxbuf_format = 0xdeadbeef; +		return FALSE; +	} + +	if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1) +		return FALSE; + +	switch (ib_size) { +	case 2: +		type = NV40TCL_IDXBUF_FORMAT_TYPE_U16; +		break; +	case 4: +		type = NV40TCL_IDXBUF_FORMAT_TYPE_U32; +		break; +	default: +		return FALSE; +	} + +	if (ib != nv40->idxbuf || +	    type != nv40->idxbuf_format) { +		nv40->dirty |= NV40_NEW_ARRAYS; +		nv40->idxbuf = ib; +		nv40->idxbuf_format = type; +	} + +	return TRUE; +} + +static boolean +nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, +		       int attrib, struct pipe_vertex_element *ve, +		       struct pipe_vertex_buffer *vb) +{ +	struct pipe_winsys *ws = nv40->pipe.winsys; +	struct nouveau_grobj *curie = nv40->screen->curie; +	unsigned type, ncomp; +	void *map; + +	if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) +		return FALSE; + +	map  = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); +	map += vb->buffer_offset + ve->src_offset; + +	switch (type) { +	case NV40TCL_VTXFMT_TYPE_FLOAT: +	{ +		float *v = map; + +		switch (ncomp) { +		case 4: +			so_method(so, curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4); +			so_data  (so, fui(v[0])); +			so_data  (so, fui(v[1])); +			so_data  (so, fui(v[2])); +			so_data  (so, fui(v[3])); +			break; +		case 3: +			so_method(so, curie, NV40TCL_VTX_ATTR_3F_X(attrib), 3); +			so_data  (so, fui(v[0])); +			so_data  (so, fui(v[1])); +			so_data  (so, fui(v[2])); +			break; +		case 2: +			so_method(so, curie, NV40TCL_VTX_ATTR_2F_X(attrib), 2); +			so_data  (so, fui(v[0])); +			so_data  (so, fui(v[1])); +			break; +		case 1: +			so_method(so, curie, NV40TCL_VTX_ATTR_1F(attrib), 1); +			so_data  (so, fui(v[0])); +			break; +		default: +			ws->buffer_unmap(ws, vb->buffer); +			return FALSE; +		} +	} +		break; +	default: +		ws->buffer_unmap(ws, vb->buffer); +		return FALSE; +	} + +	ws->buffer_unmap(ws, vb->buffer); + +	return TRUE; +} + +boolean +nv40_draw_arrays(struct pipe_context *pipe, +		 unsigned mode, unsigned start, unsigned count) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nouveau_channel *chan = nv40->nvws->channel; +	unsigned restart; + +	nv40_vbo_set_idxbuf(nv40, NULL, 0); +	if (FORCE_SWTNL || !nv40_state_validate(nv40)) { +		return nv40_draw_elements_swtnl(pipe, NULL, 0, +						mode, start, count); +	} + +	while (count) { +		unsigned vc, nr; + +		nv40_state_emit(nv40); + +		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, +					mode, start, count, &restart); +		if (!vc) { +			FIRE_RING(NULL); +			continue; +		} + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (nvgl_primitive(mode)); + +		nr = (vc & 0xff); +		if (nr) { +			BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1); +			OUT_RING  (((nr - 1) << 24) | start); +			start += nr; +		} + +		nr = vc >> 8; +		while (nr) { +			unsigned push = nr > 2047 ? 2047 : nr; + +			nr -= push; + +			BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push); +			while (push--) { +				OUT_RING(((0x100 - 1) << 24) | start); +				start += 0x100; +			} +		} + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (0); + +		count -= vc; +		start = restart; +	} + +	pipe->flush(pipe, 0, NULL); +	return TRUE; +} + +static INLINE void +nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, +		       unsigned mode, unsigned start, unsigned count) +{ +	struct nouveau_channel *chan = nv40->nvws->channel; + +	while (count) { +		uint8_t *elts = (uint8_t *)ib + start; +		unsigned vc, push, restart; + +		nv40_state_emit(nv40); + +		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, +					mode, start, count, &restart); +		if (vc == 0) { +			FIRE_RING(NULL); +			continue; +		} +		count -= vc; + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (nvgl_primitive(mode)); + +		if (vc & 1) { +			BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); +			OUT_RING  (elts[0]); +			elts++; vc--; +		} + +		while (vc) { +			unsigned i; + +			push = MIN2(vc, 2047 * 2); + +			BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); +			for (i = 0; i < push; i+=2) +				OUT_RING((elts[i+1] << 16) | elts[i]); + +			vc -= push; +			elts += push; +		} + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (0); + +		start = restart; +	} +} + +static INLINE void +nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, +		       unsigned mode, unsigned start, unsigned count) +{ +	struct nouveau_channel *chan = nv40->nvws->channel; + +	while (count) { +		uint16_t *elts = (uint16_t *)ib + start; +		unsigned vc, push, restart; + +		nv40_state_emit(nv40); + +		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, +					mode, start, count, &restart); +		if (vc == 0) { +			FIRE_RING(NULL); +			continue; +		} +		count -= vc; + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (nvgl_primitive(mode)); + +		if (vc & 1) { +			BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); +			OUT_RING  (elts[0]); +			elts++; vc--; +		} + +		while (vc) { +			unsigned i; + +			push = MIN2(vc, 2047 * 2); + +			BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); +			for (i = 0; i < push; i+=2) +				OUT_RING((elts[i+1] << 16) | elts[i]); + +			vc -= push; +			elts += push; +		} + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (0); + +		start = restart; +	} +} + +static INLINE void +nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, +		       unsigned mode, unsigned start, unsigned count) +{ +	struct nouveau_channel *chan = nv40->nvws->channel; + +	while (count) { +		uint32_t *elts = (uint32_t *)ib + start; +		unsigned vc, push, restart; + +		nv40_state_emit(nv40); + +		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, +					mode, start, count, &restart); +		if (vc == 0) { +			FIRE_RING(NULL); +			continue; +		} +		count -= vc; + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (nvgl_primitive(mode)); + +		while (vc) { +			push = MIN2(vc, 2047); + +			BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push); +			OUT_RINGp    (elts, push); + +			vc -= push; +			elts += push; +		} + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (0); + +		start = restart; +	} +} + +static boolean +nv40_draw_elements_inline(struct pipe_context *pipe, +			  struct pipe_buffer *ib, unsigned ib_size, +			  unsigned mode, unsigned start, unsigned count) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct pipe_winsys *ws = pipe->winsys; +	void *map; + +	map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); +	if (!ib) { +		NOUVEAU_ERR("failed mapping ib\n"); +		return FALSE; +	} + +	switch (ib_size) { +	case 1: +		nv40_draw_elements_u08(nv40, map, mode, start, count); +		break; +	case 2: +		nv40_draw_elements_u16(nv40, map, mode, start, count); +		break; +	case 4: +		nv40_draw_elements_u32(nv40, map, mode, start, count); +		break; +	default: +		NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); +		break; +	} + +	ws->buffer_unmap(ws, ib); +	return TRUE; +} + +static boolean +nv40_draw_elements_vbo(struct pipe_context *pipe, +		       unsigned mode, unsigned start, unsigned count) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nouveau_channel *chan = nv40->nvws->channel; +	unsigned restart; + +	while (count) { +		unsigned nr, vc; + +		nv40_state_emit(nv40); + +		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, +					mode, start, count, &restart); +		if (!vc) { +			FIRE_RING(NULL); +			continue; +		} +		 +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (nvgl_primitive(mode)); + +		nr = (vc & 0xff); +		if (nr) { +			BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1); +			OUT_RING  (((nr - 1) << 24) | start); +			start += nr; +		} + +		nr = vc >> 8; +		while (nr) { +			unsigned push = nr > 2047 ? 2047 : nr; + +			nr -= push; + +			BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push); +			while (push--) { +				OUT_RING(((0x100 - 1) << 24) | start); +				start += 0x100; +			} +		} + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (0); + +		count -= vc; +		start = restart; +	} + +	return TRUE; +} + +boolean +nv40_draw_elements(struct pipe_context *pipe, +		   struct pipe_buffer *indexBuffer, unsigned indexSize, +		   unsigned mode, unsigned start, unsigned count) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	boolean idxbuf; + +	idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize); +	if (FORCE_SWTNL || !nv40_state_validate(nv40)) { +		return nv40_draw_elements_swtnl(pipe, NULL, 0, +						mode, start, count); +	} + +	if (idxbuf) { +		nv40_draw_elements_vbo(pipe, mode, start, count); +	} else { +		nv40_draw_elements_inline(pipe, indexBuffer, indexSize, +					  mode, start, count); +	} + +	pipe->flush(pipe, 0, NULL); +	return TRUE; +} + +static boolean +nv40_vbo_validate(struct nv40_context *nv40) +{ +	struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; +	struct nouveau_grobj *curie = nv40->screen->curie; +	struct pipe_buffer *ib = nv40->idxbuf; +	unsigned ib_format = nv40->idxbuf_format; +	unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; +	int hw; + +	if (nv40->edgeflags) { +		nv40->fallback_swtnl |= NV40_NEW_ARRAYS; +		return FALSE; +	} + +	vtxbuf = so_new(20, 18); +	so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr); +	vtxfmt = so_new(17, 0); +	so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr); + +	for (hw = 0; hw < nv40->vtxelt_nr; hw++) { +		struct pipe_vertex_element *ve; +		struct pipe_vertex_buffer *vb; +		unsigned type, ncomp; + +		ve = &nv40->vtxelt[hw]; +		vb = &nv40->vtxbuf[ve->vertex_buffer_index]; + +		if (!vb->stride) { +			if (!sattr) +				sattr = so_new(16 * 5, 0); + +			if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) { +				so_data(vtxbuf, 0); +				so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); +				continue; +			} +		} + +		if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { +			nv40->fallback_swtnl |= NV40_NEW_ARRAYS; +			so_ref(NULL, &vtxbuf); +			so_ref(NULL, &vtxfmt); +			return FALSE; +		} + +		so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, +			 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, +			 0, NV40TCL_VTXBUF_ADDRESS_DMA1); +		so_data (vtxfmt, ((vb->stride << NV40TCL_VTXFMT_STRIDE_SHIFT) | +				  (ncomp << NV40TCL_VTXFMT_SIZE_SHIFT) | type)); +	} + +	if (ib) { +		so_method(vtxbuf, curie, NV40TCL_IDXBUF_ADDRESS, 2); +		so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); +		so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, +			  0, NV40TCL_IDXBUF_FORMAT_DMA1); +	} + +	so_method(vtxbuf, curie, 0x1710, 1); +	so_data  (vtxbuf, 0); + +	so_ref(vtxbuf, &nv40->state.hw[NV40_STATE_VTXBUF]); +	nv40->state.dirty |= (1ULL << NV40_STATE_VTXBUF); +	so_ref(vtxfmt, &nv40->state.hw[NV40_STATE_VTXFMT]); +	nv40->state.dirty |= (1ULL << NV40_STATE_VTXFMT); +	so_ref(sattr, &nv40->state.hw[NV40_STATE_VTXATTR]); +	nv40->state.dirty |= (1ULL << NV40_STATE_VTXATTR); +	return FALSE; +} + +struct nv40_state_entry nv40_state_vbo = { +	.validate = nv40_vbo_validate, +	.dirty = { +		.pipe = NV40_NEW_ARRAYS, +		.hw = 0, +	} +}; + diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c new file mode 100644 index 0000000000..0862386638 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -0,0 +1,1070 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +/* TODO (at least...): + *  1. Indexed consts  + ARL + *  3. NV_vp11, NV_vp2, NV_vp3 features + *       - extra arith opcodes + *       - branching + *       - texture sampling + *       - indexed attribs + *       - indexed results + *  4. bugs + */ + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 8 +#define MASK_Y 4 +#define MASK_Z 2 +#define MASK_W 1 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE 0 +#define DEF_CTEST 0 +#include "nv40_shader.h" + +#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv40_sr_neg((s)) +#define abs(s) nv40_sr_abs((s)) + +#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) + +struct nv40_vpc { +	struct nv40_vertex_program *vp; + +	struct nv40_vertex_program_exec *vpi; + +	unsigned r_temps; +	unsigned r_temps_discard; +	struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; +	struct nv40_sreg *r_address; +	struct nv40_sreg *r_temp; + +	struct nv40_sreg *imm; +	unsigned nr_imm; + +	unsigned hpos_idx; +}; + +static struct nv40_sreg +temp(struct nv40_vpc *vpc) +{ +	int idx = ffs(~vpc->r_temps) - 1; + +	if (idx < 0) { +		NOUVEAU_ERR("out of temps!!\n"); +		assert(0); +		return nv40_sr(NV40SR_TEMP, 0); +	} + +	vpc->r_temps |= (1 << idx); +	vpc->r_temps_discard |= (1 << idx); +	return nv40_sr(NV40SR_TEMP, idx); +} + +static INLINE void +release_temps(struct nv40_vpc *vpc) +{ +	vpc->r_temps &= ~vpc->r_temps_discard; +	vpc->r_temps_discard = 0; +} + +static struct nv40_sreg +constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) +{ +	struct nv40_vertex_program *vp = vpc->vp; +	struct nv40_vertex_program_data *vpd; +	int idx; + +	if (pipe >= 0) { +		for (idx = 0; idx < vp->nr_consts; idx++) { +			if (vp->consts[idx].index == pipe) +				return nv40_sr(NV40SR_CONST, idx); +		} +	} + +	idx = vp->nr_consts++; +	vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); +	vpd = &vp->consts[idx]; + +	vpd->index = pipe; +	vpd->value[0] = x; +	vpd->value[1] = y; +	vpd->value[2] = z; +	vpd->value[3] = w; +	return nv40_sr(NV40SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ +	nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + +static void +emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src) +{ +	struct nv40_vertex_program *vp = vpc->vp; +	uint32_t sr = 0; + +	switch (src.type) { +	case NV40SR_TEMP: +		sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT); +		sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT); +		break; +	case NV40SR_INPUT: +		sr |= (NV40_VP_SRC_REG_TYPE_INPUT << +		       NV40_VP_SRC_REG_TYPE_SHIFT); +		vp->ir |= (1 << src.index); +		hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT); +		break; +	case NV40SR_CONST: +		sr |= (NV40_VP_SRC_REG_TYPE_CONST << +		       NV40_VP_SRC_REG_TYPE_SHIFT); +		assert(vpc->vpi->const_index == -1 || +		       vpc->vpi->const_index == src.index); +		vpc->vpi->const_index = src.index; +		break; +	case NV40SR_NONE: +		sr |= (NV40_VP_SRC_REG_TYPE_INPUT << +		       NV40_VP_SRC_REG_TYPE_SHIFT); +		break; +	default: +		assert(0); +	} + +	if (src.negate) +		sr |= NV40_VP_SRC_NEGATE; + +	if (src.abs) +		hw[0] |= (1 << (21 + pos)); + +	sr |= ((src.swz[0] << NV40_VP_SRC_SWZ_X_SHIFT) | +	       (src.swz[1] << NV40_VP_SRC_SWZ_Y_SHIFT) | +	       (src.swz[2] << NV40_VP_SRC_SWZ_Z_SHIFT) | +	       (src.swz[3] << NV40_VP_SRC_SWZ_W_SHIFT)); + +	switch (pos) { +	case 0: +		hw[1] |= ((sr & NV40_VP_SRC0_HIGH_MASK) >> +			  NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT; +		hw[2] |= (sr & NV40_VP_SRC0_LOW_MASK) << +			  NV40_VP_INST_SRC0L_SHIFT; +		break; +	case 1: +		hw[2] |= sr << NV40_VP_INST_SRC1_SHIFT; +		break; +	case 2: +		hw[2] |= ((sr & NV40_VP_SRC2_HIGH_MASK) >> +			  NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT; +		hw[3] |= (sr & NV40_VP_SRC2_LOW_MASK) << +			  NV40_VP_INST_SRC2L_SHIFT; +		break; +	default: +		assert(0); +	} +} + +static void +emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) +{ +	struct nv40_vertex_program *vp = vpc->vp; + +	switch (dst.type) { +	case NV40SR_TEMP: +		hw[3] |= NV40_VP_INST_DEST_MASK; +		if (slot == 0) { +			hw[0] |= (dst.index << +				  NV40_VP_INST_VEC_DEST_TEMP_SHIFT); +		} else { +			hw[3] |= (dst.index <<  +				  NV40_VP_INST_SCA_DEST_TEMP_SHIFT); +		} +		break; +	case NV40SR_OUTPUT: +		switch (dst.index) { +		case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; +		case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; +		case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; +		case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; +		case NV40_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; +		case NV40_VP_INST_DEST_PSZ  : vp->or |= (1 << 5); break; +		case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; +		case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; +		case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; +		case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; +		case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; +		case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; +		case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; +		case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; +		case NV40_VP_INST_DEST_CLIP(0): +			vp->or |= (1 << 6); +			vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE0; +			dst.index = NV40_VP_INST_DEST_FOGC; +			break; +		case NV40_VP_INST_DEST_CLIP(1): +			vp->or |= (1 << 7); +			vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE1; +			dst.index = NV40_VP_INST_DEST_FOGC; +			break; +		case NV40_VP_INST_DEST_CLIP(2): +			vp->or |= (1 << 8); +			vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE2; +			dst.index = NV40_VP_INST_DEST_FOGC; +			break; +		case NV40_VP_INST_DEST_CLIP(3): +			vp->or |= (1 << 9); +			vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE3; +			dst.index = NV40_VP_INST_DEST_PSZ; +			break; +		case NV40_VP_INST_DEST_CLIP(4): +			vp->or |= (1 << 10); +			vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE4; +			dst.index = NV40_VP_INST_DEST_PSZ; +			break; +		case NV40_VP_INST_DEST_CLIP(5): +			vp->or |= (1 << 11); +			vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE5; +			dst.index = NV40_VP_INST_DEST_PSZ; +			break; +		default: +			break; +		} + +		hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); +		if (slot == 0) { +			hw[0] |= NV40_VP_INST_VEC_RESULT; +			hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); +		} else { +			hw[3] |= NV40_VP_INST_SCA_RESULT; +			hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; +		} +		break; +	default: +		assert(0); +	} +} + +static void +nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, +	      struct nv40_sreg dst, int mask, +	      struct nv40_sreg s0, struct nv40_sreg s1, +	      struct nv40_sreg s2) +{ +	struct nv40_vertex_program *vp = vpc->vp; +	uint32_t *hw; + +	vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); +	vpc->vpi = &vp->insns[vp->nr_insns - 1]; +	memset(vpc->vpi, 0, sizeof(*vpc->vpi)); +	vpc->vpi->const_index = -1; + +	hw = vpc->vpi->data; + +	hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT); +	hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) | +		  (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) | +		  (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) | +		  (3 << NV40_VP_INST_COND_SWZ_W_SHIFT)); + +	if (slot == 0) { +		hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); +		hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; +		hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); +	} else { +		hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); +		hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); +		hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); +	} + +	emit_dst(vpc, hw, slot, dst); +	emit_src(vpc, hw, 0, s0); +	emit_src(vpc, hw, 1, s1); +	emit_src(vpc, hw, 2, s2); +} + +static INLINE struct nv40_sreg +tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { +	struct nv40_sreg src; + +	switch (fsrc->SrcRegister.File) { +	case TGSI_FILE_INPUT: +		src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index); +		break; +	case TGSI_FILE_CONSTANT: +		src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); +		break; +	case TGSI_FILE_IMMEDIATE: +		src = vpc->imm[fsrc->SrcRegister.Index]; +		break; +	case TGSI_FILE_TEMPORARY: +		src = vpc->r_temp[fsrc->SrcRegister.Index]; +		break; +	default: +		NOUVEAU_ERR("bad src file\n"); +		break; +	} + +	src.abs = fsrc->SrcRegisterExtMod.Absolute; +	src.negate = fsrc->SrcRegister.Negate; +	src.swz[0] = fsrc->SrcRegister.SwizzleX; +	src.swz[1] = fsrc->SrcRegister.SwizzleY; +	src.swz[2] = fsrc->SrcRegister.SwizzleZ; +	src.swz[3] = fsrc->SrcRegister.SwizzleW; +	return src; +} + +static INLINE struct nv40_sreg +tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { +	struct nv40_sreg dst; + +	switch (fdst->DstRegister.File) { +	case TGSI_FILE_OUTPUT: +		dst = vpc->r_result[fdst->DstRegister.Index]; +		break; +	case TGSI_FILE_TEMPORARY: +		dst = vpc->r_temp[fdst->DstRegister.Index]; +		break; +	case TGSI_FILE_ADDRESS: +		dst = vpc->r_address[fdst->DstRegister.Index]; +		break; +	default: +		NOUVEAU_ERR("bad dst file\n"); +		break; +	} + +	return dst; +} + +static INLINE int +tgsi_mask(uint tgsi) +{ +	int mask = 0; + +	if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; +	if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; +	if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; +	if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; +	return mask; +} + +static boolean +src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, +	       struct nv40_sreg *src) +{ +	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); +	struct nv40_sreg tgsi = tgsi_src(vpc, fsrc); +	uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; +	uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, +			fsrc->SrcRegisterExtSwz.NegateY, +			fsrc->SrcRegisterExtSwz.NegateZ, +			fsrc->SrcRegisterExtSwz.NegateW }; +	uint c; + +	for (c = 0; c < 4; c++) { +		switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { +		case TGSI_EXTSWIZZLE_X: +		case TGSI_EXTSWIZZLE_Y: +		case TGSI_EXTSWIZZLE_Z: +		case TGSI_EXTSWIZZLE_W: +			mask |= tgsi_mask(1 << c); +			break; +		case TGSI_EXTSWIZZLE_ZERO: +			zero_mask |= tgsi_mask(1 << c); +			tgsi.swz[c] = SWZ_X; +			break; +		case TGSI_EXTSWIZZLE_ONE: +			one_mask |= tgsi_mask(1 << c); +			tgsi.swz[c] = SWZ_X; +			break; +		default: +			assert(0); +		} + +		if (!tgsi.negate && neg[c]) +			neg_mask |= tgsi_mask(1 << c); +	} + +	if (mask == MASK_ALL && !neg_mask) +		return TRUE; + +	*src = temp(vpc); + +	if (mask) +		arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none); + +	if (zero_mask) +		arith(vpc, 0, OP_SFL, *src, zero_mask, *src, none, none); + +	if (one_mask) +		arith(vpc, 0, OP_STR, *src, one_mask, *src, none, none); + +	if (neg_mask) { +		struct nv40_sreg one = temp(vpc); +		arith(vpc, 0, OP_STR, one, neg_mask, one, none, none); +		arith(vpc, 0, OP_MUL, *src, neg_mask, *src, neg(one), none); +	} + +	return FALSE; +} + +static boolean +nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, +				const struct tgsi_full_instruction *finst) +{ +	struct nv40_sreg src[3], dst, tmp; +	struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); +	int mask; +	int ai = -1, ci = -1, ii = -1; +	int i; + +	if (finst->Instruction.Opcode == TGSI_OPCODE_END) +		return TRUE; + +	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { +		const struct tgsi_full_src_register *fsrc; + +		fsrc = &finst->FullSrcRegisters[i]; +		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { +			src[i] = tgsi_src(vpc, fsrc); +		} +	} + +	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { +		const struct tgsi_full_src_register *fsrc; + +		fsrc = &finst->FullSrcRegisters[i]; + +		switch (fsrc->SrcRegister.File) { +		case TGSI_FILE_INPUT: +		case TGSI_FILE_CONSTANT: +		case TGSI_FILE_TEMPORARY: +			if (!src_native_swz(vpc, fsrc, &src[i])) +				continue; +			break; +		default: +			break; +		} + +		switch (fsrc->SrcRegister.File) { +		case TGSI_FILE_INPUT: +			if (ai == -1 || ai == fsrc->SrcRegister.Index) { +				ai = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(vpc, fsrc); +			} else { +				src[i] = temp(vpc); +				arith(vpc, 0, OP_MOV, src[i], MASK_ALL, +				      tgsi_src(vpc, fsrc), none, none); +			} +			break; +		case TGSI_FILE_CONSTANT: +			if ((ci == -1 && ii == -1) || +			    ci == fsrc->SrcRegister.Index) { +				ci = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(vpc, fsrc); +			} else { +				src[i] = temp(vpc); +				arith(vpc, 0, OP_MOV, src[i], MASK_ALL, +				      tgsi_src(vpc, fsrc), none, none); +			} +			break; +		case TGSI_FILE_IMMEDIATE: +			if ((ci == -1 && ii == -1) || +			    ii == fsrc->SrcRegister.Index) { +				ii = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(vpc, fsrc); +			} else { +				src[i] = temp(vpc); +				arith(vpc, 0, OP_MOV, src[i], MASK_ALL, +				      tgsi_src(vpc, fsrc), none, none); +			} +			break; +		case TGSI_FILE_TEMPORARY: +			/* handled above */ +			break; +		default: +			NOUVEAU_ERR("bad src file\n"); +			return FALSE; +		} +	} + +	dst  = tgsi_dst(vpc, &finst->FullDstRegisters[0]); +	mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + +	switch (finst->Instruction.Opcode) { +	case TGSI_OPCODE_ABS: +		arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); +		break; +	case TGSI_OPCODE_ADD: +		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); +		break; +	case TGSI_OPCODE_ARL: +		arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_DP3: +		arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DP4: +		arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DPH: +		arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DST: +		arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_EX2: +		arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_EXP: +		arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_FLR: +		arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_FRC: +		arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_LG2: +		arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_LIT: +		arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_LOG: +		arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_MAD: +		arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); +		break; +	case TGSI_OPCODE_MAX: +		arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_MIN: +		arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_MOV: +		arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_MUL: +		arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_POW: +		tmp = temp(vpc); +		arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, +		      swz(src[0], X, X, X, X)); +		arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), +		      swz(src[1], X, X, X, X), none); +		arith(vpc, 1, OP_EX2, dst, mask, none, none, +		      swz(tmp, X, X, X, X)); +		break; +	case TGSI_OPCODE_RCP: +		arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_RET: +		break; +	case TGSI_OPCODE_RSQ: +		arith(vpc, 1, OP_RSQ, dst, mask, none, none, abs(src[0])); +		break; +	case TGSI_OPCODE_SGE: +		arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SLT: +		arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SUB: +		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); +		break; +	case TGSI_OPCODE_XPD: +		tmp = temp(vpc); +		arith(vpc, 0, OP_MUL, tmp, mask, +		      swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); +		arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), +		      swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), +		      neg(tmp)); +		break; +	default: +		NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); +		return FALSE; +	} + +	release_temps(vpc); +	return TRUE; +} + +static boolean +nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, +				const struct tgsi_full_declaration *fdec) +{ +	unsigned idx = fdec->DeclarationRange.First; +	int hw; + +	switch (fdec->Semantic.SemanticName) { +	case TGSI_SEMANTIC_POSITION: +		hw = NV40_VP_INST_DEST_POS; +		vpc->hpos_idx = idx; +		break; +	case TGSI_SEMANTIC_COLOR: +		if (fdec->Semantic.SemanticIndex == 0) { +			hw = NV40_VP_INST_DEST_COL0; +		} else +		if (fdec->Semantic.SemanticIndex == 1) { +			hw = NV40_VP_INST_DEST_COL1; +		} else { +			NOUVEAU_ERR("bad colour semantic index\n"); +			return FALSE; +		} +		break; +	case TGSI_SEMANTIC_BCOLOR: +		if (fdec->Semantic.SemanticIndex == 0) { +			hw = NV40_VP_INST_DEST_BFC0; +		} else +		if (fdec->Semantic.SemanticIndex == 1) { +			hw = NV40_VP_INST_DEST_BFC1; +		} else { +			NOUVEAU_ERR("bad bcolour semantic index\n"); +			return FALSE; +		} +		break; +	case TGSI_SEMANTIC_FOG: +		hw = NV40_VP_INST_DEST_FOGC; +		break; +	case TGSI_SEMANTIC_PSIZE: +		hw = NV40_VP_INST_DEST_PSZ; +		break; +	case TGSI_SEMANTIC_GENERIC: +		if (fdec->Semantic.SemanticIndex <= 7) { +			hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); +		} else { +			NOUVEAU_ERR("bad generic semantic index\n"); +			return FALSE; +		} +		break; +	default: +		NOUVEAU_ERR("bad output semantic\n"); +		return FALSE; +	} + +	vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); +	return TRUE; +} + +static boolean +nv40_vertprog_prepare(struct nv40_vpc *vpc) +{ +	struct tgsi_parse_context p; +	int high_temp = -1, high_addr = -1, nr_imm = 0, i; + +	tgsi_parse_init(&p, vpc->vp->pipe.tokens); +	while (!tgsi_parse_end_of_tokens(&p)) { +		const union tgsi_full_token *tok = &p.FullToken; + +		tgsi_parse_token(&p); +		switch(tok->Token.Type) { +		case TGSI_TOKEN_TYPE_IMMEDIATE: +			nr_imm++; +			break; +		case TGSI_TOKEN_TYPE_DECLARATION: +		{ +			const struct tgsi_full_declaration *fdec; + +			fdec = &p.FullToken.FullDeclaration; +			switch (fdec->Declaration.File) { +			case TGSI_FILE_TEMPORARY: +				if (fdec->DeclarationRange.Last > high_temp) { +					high_temp = +						fdec->DeclarationRange.Last; +				} +				break; +#if 0 /* this would be nice.. except gallium doesn't track it */ +			case TGSI_FILE_ADDRESS: +				if (fdec->DeclarationRange.Last > high_addr) { +					high_addr = +						fdec->DeclarationRange.Last; +				} +				break; +#endif +			case TGSI_FILE_OUTPUT: +				if (!nv40_vertprog_parse_decl_output(vpc, fdec)) +					return FALSE; +				break; +			default: +				break; +			} +		} +			break; +#if 1 /* yay, parse instructions looking for address regs instead */ +		case TGSI_TOKEN_TYPE_INSTRUCTION: +		{ +			const struct tgsi_full_instruction *finst; +			const struct tgsi_full_dst_register *fdst; + +			finst = &p.FullToken.FullInstruction; +			fdst = &finst->FullDstRegisters[0]; + +			if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) { +				if (fdst->DstRegister.Index > high_addr) +					high_addr = fdst->DstRegister.Index; +			} +		 +		} +			break; +#endif +		default: +			break; +		} +	} +	tgsi_parse_free(&p); + +	if (nr_imm) { +		vpc->imm = CALLOC(nr_imm, sizeof(struct nv40_sreg)); +		assert(vpc->imm); +	} + +	if (++high_temp) { +		vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); +		for (i = 0; i < high_temp; i++) +			vpc->r_temp[i] = temp(vpc); +	} + +	if (++high_addr) { +		vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg)); +		for (i = 0; i < high_addr; i++) +			vpc->r_address[i] = temp(vpc); +	} + +	vpc->r_temps_discard = 0; +	return TRUE; +} + +static void +nv40_vertprog_translate(struct nv40_context *nv40, +			struct nv40_vertex_program *vp) +{ +	struct tgsi_parse_context parse; +	struct nv40_vpc *vpc = NULL; +	struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); +	int i; + +	vpc = CALLOC(1, sizeof(struct nv40_vpc)); +	if (!vpc) +		return; +	vpc->vp = vp; + +	if (!nv40_vertprog_prepare(vpc)) { +		FREE(vpc); +		return; +	} + +	/* Redirect post-transform vertex position to a temp if user clip +	 * planes are enabled.  We need to append code the the vtxprog +	 * to handle clip planes later. +	 */ +	if (vp->ucp.nr)  { +		vpc->r_result[vpc->hpos_idx] = temp(vpc); +		vpc->r_temps_discard = 0; +	} + +	tgsi_parse_init(&parse, vp->pipe.tokens); + +	while (!tgsi_parse_end_of_tokens(&parse)) { +		tgsi_parse_token(&parse); + +		switch (parse.FullToken.Token.Type) { +		case TGSI_TOKEN_TYPE_IMMEDIATE: +		{ +			const struct tgsi_full_immediate *imm; + +			imm = &parse.FullToken.FullImmediate; +			assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); +			assert(imm->Immediate.NrTokens == 4 + 1); +			vpc->imm[vpc->nr_imm++] = +				constant(vpc, -1, +					 imm->u.ImmediateFloat32[0].Float, +					 imm->u.ImmediateFloat32[1].Float, +					 imm->u.ImmediateFloat32[2].Float, +					 imm->u.ImmediateFloat32[3].Float); +		} +			break; +		case TGSI_TOKEN_TYPE_INSTRUCTION: +		{ +			const struct tgsi_full_instruction *finst; +			finst = &parse.FullToken.FullInstruction; +			if (!nv40_vertprog_parse_instruction(vpc, finst)) +				goto out_err; +		} +			break; +		default: +			break; +		} +	} + +	/* Write out HPOS if it was redirected to a temp earlier */ +	if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) { +		struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT, +						NV40_VP_INST_DEST_POS); +		struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + +		arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none); +	} + +	/* Insert code to handle user clip planes */ +	for (i = 0; i < vp->ucp.nr; i++) { +		struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT, +						NV40_VP_INST_DEST_CLIP(i)); +		struct nv40_sreg ceqn = constant(vpc, -1, +						 nv40->clip.ucp[i][0], +						 nv40->clip.ucp[i][1], +						 nv40->clip.ucp[i][2], +						 nv40->clip.ucp[i][3]); +		struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; +		unsigned mask; + +		switch (i) { +		case 0: case 3: mask = MASK_Y; break; +		case 1: case 4: mask = MASK_Z; break; +		case 2: case 5: mask = MASK_W; break; +		default: +			NOUVEAU_ERR("invalid clip dist #%d\n", i); +			goto out_err; +		} + +		arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none); +	} + +	vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST; +	vp->translated = TRUE; +out_err: +	tgsi_parse_free(&parse); +	if (vpc->r_temp) +		FREE(vpc->r_temp);  +	if (vpc->r_address) +		FREE(vpc->r_address);  +	if (vpc->imm)	 +		FREE(vpc->imm);  +	FREE(vpc); +} + +static boolean +nv40_vertprog_validate(struct nv40_context *nv40) +{  +	struct nouveau_winsys *nvws = nv40->nvws; +	struct pipe_winsys *ws = nv40->pipe.winsys; +	struct nouveau_grobj *curie = nv40->screen->curie; +	struct nv40_vertex_program *vp; +	struct pipe_buffer *constbuf; +	boolean upload_code = FALSE, upload_data = FALSE; +	int i; + +	if (nv40->render_mode == HW) { +		vp = nv40->vertprog; +		constbuf = nv40->constbuf[PIPE_SHADER_VERTEX]; + +		if ((nv40->dirty & NV40_NEW_UCP) || +		    memcmp(&nv40->clip, &vp->ucp, sizeof(vp->ucp))) { +			nv40_vertprog_destroy(nv40, vp); +			memcpy(&vp->ucp, &nv40->clip, sizeof(vp->ucp)); +		} +	} else { +		vp = nv40->swtnl.vertprog; +		constbuf = NULL; +	} + +	/* Translate TGSI shader into hw bytecode */ +	if (vp->translated) +		goto check_gpu_resources; + +	nv40->fallback_swtnl &= ~NV40_NEW_VERTPROG; +	nv40_vertprog_translate(nv40, vp); +	if (!vp->translated) { +		nv40->fallback_swtnl |= NV40_NEW_VERTPROG; +		return FALSE; +	} + +check_gpu_resources: +	/* Allocate hw vtxprog exec slots */ +	if (!vp->exec) { +		struct nouveau_resource *heap = nv40->screen->vp_exec_heap; +		struct nouveau_stateobj *so; +		uint vplen = vp->nr_insns; + +		if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { +			while (heap->next && heap->size < vplen) { +				struct nv40_vertex_program *evict; +				 +				evict = heap->next->priv; +				nvws->res_free(&evict->exec); +			} + +			if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) +				assert(0); +		} + +		so = so_new(7, 0); +		so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1); +		so_data  (so, vp->exec->start); +		so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2); +		so_data  (so, vp->ir); +		so_data  (so, vp->or); +		so_method(so, curie,  NV40TCL_CLIP_PLANE_ENABLE, 1); +		so_data  (so, vp->clip_ctrl); +		so_ref(so, &vp->so); + +		upload_code = TRUE; +	} + +	/* Allocate hw vtxprog const slots */ +	if (vp->nr_consts && !vp->data) { +		struct nouveau_resource *heap = nv40->screen->vp_data_heap; + +		if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { +			while (heap->next && heap->size < vp->nr_consts) { +				struct nv40_vertex_program *evict; +				 +				evict = heap->next->priv; +				nvws->res_free(&evict->data); +			} + +			if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) +				assert(0); +		} + +		/*XXX: handle this some day */ +		assert(vp->data->start >= vp->data_start_min); + +		upload_data = TRUE; +		if (vp->data_start != vp->data->start) +			upload_code = TRUE; +	} + +	/* If exec or data segments moved we need to patch the program to +	 * fixup offsets and register IDs. +	 */ +	if (vp->exec_start != vp->exec->start) { +		for (i = 0; i < vp->nr_insns; i++) { +			struct nv40_vertex_program_exec *vpi = &vp->insns[i]; + +			if (vpi->has_branch_offset) { +				assert(0); +			} +		} + +		vp->exec_start = vp->exec->start; +	} + +	if (vp->nr_consts && vp->data_start != vp->data->start) { +		for (i = 0; i < vp->nr_insns; i++) { +			struct nv40_vertex_program_exec *vpi = &vp->insns[i]; + +			if (vpi->const_index >= 0) { +				vpi->data[1] &= ~NV40_VP_INST_CONST_SRC_MASK; +				vpi->data[1] |= +					(vpi->const_index + vp->data->start) << +					NV40_VP_INST_CONST_SRC_SHIFT; + +			} +		} + +		vp->data_start = vp->data->start; +	} + +	/* Update + Upload constant values */ +	if (vp->nr_consts) { +		float *map = NULL; + +		if (constbuf) { +			map = ws->buffer_map(ws, constbuf, +					     PIPE_BUFFER_USAGE_CPU_READ); +		} + +		for (i = 0; i < vp->nr_consts; i++) { +			struct nv40_vertex_program_data *vpd = &vp->consts[i]; + +			if (vpd->index >= 0) { +				if (!upload_data && +				    !memcmp(vpd->value, &map[vpd->index * 4], +					    4 * sizeof(float))) +					continue; +				memcpy(vpd->value, &map[vpd->index * 4], +				       4 * sizeof(float)); +			} + +			BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); +			OUT_RING  (i + vp->data->start); +			OUT_RINGp ((uint32_t *)vpd->value, 4); +		} + +		if (constbuf) +			ws->buffer_unmap(ws, constbuf); +	} + +	/* Upload vtxprog */ +	if (upload_code) { +#if 0 +		for (i = 0; i < vp->nr_insns; i++) { +			NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]); +			NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]); +			NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]); +			NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); +		} +#endif +		BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); +		OUT_RING  (vp->exec->start); +		for (i = 0; i < vp->nr_insns; i++) { +			BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4); +			OUT_RINGp (vp->insns[i].data, 4); +		} +	} + +	if (vp->so != nv40->state.hw[NV40_STATE_VERTPROG]) { +		so_ref(vp->so, &nv40->state.hw[NV40_STATE_VERTPROG]); +		return TRUE; +	} + +	return FALSE; +} + +void +nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp) +{ +	struct nouveau_winsys *nvws = nv40->screen->nvws; + +	vp->translated = FALSE; + +	if (vp->nr_insns) { +		FREE(vp->insns); +		vp->insns = NULL; +		vp->nr_insns = 0; +	} + +	if (vp->nr_consts) { +		FREE(vp->consts); +		vp->consts = NULL; +		vp->nr_consts = 0; +	} + +	nvws->res_free(&vp->exec); +	vp->exec_start = 0; +	nvws->res_free(&vp->data); +	vp->data_start = 0; +	vp->data_start_min = 0; + +	vp->ir = vp->or = vp->clip_ctrl = 0; +	so_ref(NULL, &vp->so); +} + +struct nv40_state_entry nv40_state_vertprog = { +	.validate = nv40_vertprog_validate, +	.dirty = { +		.pipe = NV40_NEW_VERTPROG | NV40_NEW_UCP, +		.hw = NV40_STATE_VERTPROG, +	} +}; + diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile new file mode 100644 index 0000000000..be30400c03 --- /dev/null +++ b/src/gallium/drivers/nv50/Makefile @@ -0,0 +1,29 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv50 + +DRIVER_SOURCES = \ +	nv50_clear.c \ +	nv50_context.c \ +	nv50_draw.c \ +	nv50_miptree.c \ +	nv50_query.c \ +	nv50_program.c \ +	nv50_screen.c \ +	nv50_state.c \ +	nv50_state_validate.c \ +	nv50_surface.c \ +	nv50_tex.c \ +	nv50_vbo.c + +C_SOURCES = \ +	$(COMMON_SOURCES) \ +	$(DRIVER_SOURCES) + +ASM_SOURCES =  + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c new file mode 100644 index 0000000000..f9bc3b53ca --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_clear.c @@ -0,0 +1,92 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv50_context.h" + +void +nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, +	   unsigned clearValue) +{ +	struct nv50_context *nv50 = nv50_context(pipe); +	struct nouveau_channel *chan = nv50->screen->nvws->channel; +	struct nouveau_grobj *tesla = nv50->screen->tesla; +	struct pipe_framebuffer_state fb, s_fb = nv50->framebuffer; +	struct pipe_scissor_state sc, s_sc = nv50->scissor; +	unsigned dirty = nv50->dirty; + +	nv50->dirty = 0; + +	if (ps->format == PIPE_FORMAT_Z24S8_UNORM || +	    ps->format == PIPE_FORMAT_Z16_UNORM) { +		fb.nr_cbufs = 0; +		fb.zsbuf = ps; +	} else { +		fb.nr_cbufs = 1; +		fb.cbufs[0] = ps; +		fb.zsbuf = NULL; +	} +	fb.width = ps->width; +	fb.height = ps->height; +	pipe->set_framebuffer_state(pipe, &fb); + +	sc.minx = sc.miny = 0; +	sc.maxx = fb.width; +	sc.maxy = fb.height; +	pipe->set_scissor_state(pipe, &sc); + +	nv50_state_validate(nv50); + +	switch (ps->format) { +	case PIPE_FORMAT_A8R8G8B8_UNORM: +		BEGIN_RING(chan, tesla, 0x0d80, 4); +		OUT_RINGf (chan, ubyte_to_float((clearValue >> 16) & 0xff)); +		OUT_RINGf (chan, ubyte_to_float((clearValue >>  8) & 0xff)); +		OUT_RINGf (chan, ubyte_to_float((clearValue >>  0) & 0xff)); +		OUT_RINGf (chan, ubyte_to_float((clearValue >> 24) & 0xff)); +		BEGIN_RING(chan, tesla, 0x19d0, 1); +		OUT_RING  (chan, 0x3c); +		break; +	case PIPE_FORMAT_Z24S8_UNORM: +		BEGIN_RING(chan, tesla, 0x0d90, 1); +		OUT_RINGf (chan, (float)(clearValue >> 8) * (1.0 / 16777215.0)); +		BEGIN_RING(chan, tesla, 0x0da0, 1); +		OUT_RING  (chan, clearValue & 0xff); +		BEGIN_RING(chan, tesla, 0x19d0, 1); +		OUT_RING  (chan, 0x03); +		break; +	default: +		pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, +				   clearValue); +		break; +	} + +	pipe->set_framebuffer_state(pipe, &s_fb); +	pipe->set_scissor_state(pipe, &s_sc); +	nv50->dirty |= dirty; + +	ps->status = PIPE_SURFACE_STATUS_CLEAR; +} + diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c new file mode 100644 index 0000000000..565a5da668 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -0,0 +1,90 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv50_context.h" +#include "nv50_screen.h" + +static void +nv50_flush(struct pipe_context *pipe, unsigned flags, +	   struct pipe_fence_handle **fence) +{ +	struct nv50_context *nv50 = (struct nv50_context *)pipe; +	 +	FIRE_RING(nv50->screen->nvws->channel); +} + +static void +nv50_destroy(struct pipe_context *pipe) +{ +	struct nv50_context *nv50 = (struct nv50_context *)pipe; + +	draw_destroy(nv50->draw); +	FREE(nv50); +} + + +static void +nv50_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + +struct pipe_context * +nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ +	struct pipe_winsys *pipe_winsys = pscreen->winsys; +	struct nv50_screen *screen = nv50_screen(pscreen); +	struct nv50_context *nv50; + +	nv50 = CALLOC_STRUCT(nv50_context); +	if (!nv50) +		return NULL; +	nv50->screen = screen; +	nv50->pctx_id = pctx_id; + +	nv50->pipe.winsys = pipe_winsys; +	nv50->pipe.screen = pscreen; + +	nv50->pipe.destroy = nv50_destroy; + +	nv50->pipe.set_edgeflags = nv50_set_edgeflags; +	nv50->pipe.draw_arrays = nv50_draw_arrays; +	nv50->pipe.draw_elements = nv50_draw_elements; +	nv50->pipe.clear = nv50_clear; + +	nv50->pipe.flush = nv50_flush; + +	nv50_init_surface_functions(nv50); +	nv50_init_state_functions(nv50); +	nv50_init_query_functions(nv50); + +	nv50->draw = draw_create(); +	assert(nv50->draw); +	draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50)); + +	return &nv50->pipe; +} + +		 diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h new file mode 100644 index 0000000000..1e9d45cb34 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -0,0 +1,207 @@ +#ifndef __NV50_CONTEXT_H__ +#define __NV50_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_stateobj.h" + +#include "nv50_screen.h" +#include "nv50_program.h" + +#define NOUVEAU_ERR(fmt, args...) \ +	fprintf(stderr, "%s:%d -  "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ +	fprintf(stderr, "nouveau: "fmt, ##args); + +/* Constant buffer assignment */ +#define NV50_CB_PMISC		0 +#define NV50_CB_PVP		1 +#define NV50_CB_PFP		2 +#define NV50_CB_PGP		3 +#define NV50_CB_TIC		4 +#define NV50_CB_TSC		5 +#define NV50_CB_PUPLOAD         6 + +#define NV50_NEW_BLEND		(1 << 0) +#define NV50_NEW_ZSA		(1 << 1) +#define NV50_NEW_BLEND_COLOUR	(1 << 2) +#define NV50_NEW_STIPPLE	(1 << 3) +#define NV50_NEW_SCISSOR	(1 << 4) +#define NV50_NEW_VIEWPORT	(1 << 5) +#define NV50_NEW_RASTERIZER	(1 << 6) +#define NV50_NEW_FRAMEBUFFER	(1 << 7) +#define NV50_NEW_VERTPROG	(1 << 8) +#define NV50_NEW_VERTPROG_CB	(1 << 9) +#define NV50_NEW_FRAGPROG	(1 << 10) +#define NV50_NEW_FRAGPROG_CB	(1 << 11) +#define NV50_NEW_ARRAYS		(1 << 12) +#define NV50_NEW_SAMPLER	(1 << 13) +#define NV50_NEW_TEXTURE	(1 << 14) + +struct nv50_blend_stateobj { +	struct pipe_blend_state pipe; +	struct nouveau_stateobj *so; +}; + +struct nv50_zsa_stateobj { +	struct pipe_depth_stencil_alpha_state pipe; +	struct nouveau_stateobj *so; +}; + +struct nv50_rasterizer_stateobj { +	struct pipe_rasterizer_state pipe; +	struct nouveau_stateobj *so; +}; + +struct nv50_miptree_level { +	struct pipe_buffer **image; +	int *image_offset; +	unsigned image_dirty_cpu[512/32]; +	unsigned image_dirty_gpu[512/32]; +}; + +struct nv50_miptree { +	struct pipe_texture base; +	struct pipe_buffer *buffer; + +	struct nv50_miptree_level level[PIPE_MAX_TEXTURE_LEVELS]; +	int image_nr; +	int total_size; +}; + +static INLINE struct nv50_miptree * +nv50_miptree(struct pipe_texture *pt) +{ +	return (struct nv50_miptree *)pt; +} + +struct nv50_surface { +	struct pipe_surface base; +}; + +static INLINE struct nv50_surface * +nv50_surface(struct pipe_surface *pt) +{ +	return (struct nv50_surface *)pt; +} + +static INLINE struct pipe_buffer * +nv50_surface_buffer(struct pipe_surface *surface) +{ +	struct nv50_miptree *mt = (struct nv50_miptree *)surface->texture; +	return mt->buffer; +} + +struct nv50_state { +	unsigned dirty; + +	struct nouveau_stateobj *fb; +	struct nouveau_stateobj *blend; +	struct nouveau_stateobj *blend_colour; +	struct nouveau_stateobj *zsa; +	struct nouveau_stateobj *rast; +	struct nouveau_stateobj *stipple; +	struct nouveau_stateobj *scissor; +	unsigned scissor_enabled; +	struct nouveau_stateobj *viewport; +	unsigned viewport_bypass; +	struct nouveau_stateobj *tsc_upload; +	struct nouveau_stateobj *tic_upload; +	struct nouveau_stateobj *vertprog; +	struct nouveau_stateobj *fragprog; +	struct nouveau_stateobj *vtxfmt; +	struct nouveau_stateobj *vtxbuf; +}; + +struct nv50_context { +	struct pipe_context pipe; + +	struct nv50_screen *screen; +	unsigned pctx_id; + +	struct draw_context *draw; + +	struct nv50_state state; + +	unsigned dirty; +	struct nv50_blend_stateobj *blend; +	struct nv50_zsa_stateobj *zsa; +	struct nv50_rasterizer_stateobj *rasterizer; +	struct pipe_blend_color blend_colour; +	struct pipe_poly_stipple stipple; +	struct pipe_scissor_state scissor; +	struct pipe_viewport_state viewport; +	struct pipe_framebuffer_state framebuffer; +	struct nv50_program *vertprog; +	struct nv50_program *fragprog; +	struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; +	struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; +	unsigned vtxbuf_nr; +	struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; +	unsigned vtxelt_nr; +	unsigned *sampler[PIPE_MAX_SAMPLERS]; +	unsigned sampler_nr; +	struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS]; +	unsigned miptree_nr; +}; + +static INLINE struct nv50_context * +nv50_context(struct pipe_context *pipe) +{ +	return (struct nv50_context *)pipe; +} + +extern void nv50_init_surface_functions(struct nv50_context *nv50); +extern void nv50_init_state_functions(struct nv50_context *nv50); +extern void nv50_init_query_functions(struct nv50_context *nv50); + +extern void nv50_screen_init_miptree_functions(struct pipe_screen *pscreen); + +extern int +nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, +		     int dx, int dy, struct pipe_surface *src, int sx, int sy, +		     int w, int h); + +/* nv50_draw.c */ +extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); + +/* nv50_vbo.c */ +extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode, +				unsigned start, unsigned count); +extern boolean nv50_draw_elements(struct pipe_context *pipe, +				  struct pipe_buffer *indexBuffer, +				  unsigned indexSize, +				  unsigned mode, unsigned start, +				  unsigned count); +extern void nv50_vbo_validate(struct nv50_context *nv50); + +/* nv50_clear.c */ +extern void nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, +		       unsigned clearValue); + +/* nv50_program.c */ +extern void nv50_vertprog_validate(struct nv50_context *nv50); +extern void nv50_fragprog_validate(struct nv50_context *nv50); +extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); + +/* nv50_state_validate.c */ +extern boolean nv50_state_validate(struct nv50_context *nv50); + +/* nv50_tex.c */ +extern void nv50_tex_validate(struct nv50_context *); + +/* nv50_miptree.c */ +extern void nv50_miptree_sync(struct pipe_screen *, struct nv50_miptree *, +			      unsigned level, unsigned image); + +#endif diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c new file mode 100644 index 0000000000..2f6f607261 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_draw.c @@ -0,0 +1,89 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "draw/draw_pipe.h" + +#include "nv50_context.h" + +struct nv50_render_stage { +	struct draw_stage stage; +	struct nv50_context *nv50; +}; + +static INLINE struct nv50_render_stage * +nv50_render_stage(struct draw_stage *stage) +{ +	return (struct nv50_render_stage *)stage; +} + +static void +nv50_render_point(struct draw_stage *stage, struct prim_header *prim) +{ +	NOUVEAU_ERR("\n"); +} + +static void +nv50_render_line(struct draw_stage *stage, struct prim_header *prim) +{ +	NOUVEAU_ERR("\n"); +} + +static void +nv50_render_tri(struct draw_stage *stage, struct prim_header *prim) +{ +	NOUVEAU_ERR("\n"); +} + +static void +nv50_render_flush(struct draw_stage *stage, unsigned flags) +{ +} + +static void +nv50_render_reset_stipple_counter(struct draw_stage *stage) +{ +	NOUVEAU_ERR("\n"); +} + +static void +nv50_render_destroy(struct draw_stage *stage) +{ +	FREE(stage); +} + +struct draw_stage * +nv50_draw_render_stage(struct nv50_context *nv50) +{ +	struct nv50_render_stage *rs = CALLOC_STRUCT(nv50_render_stage); + +	rs->nv50 = nv50; +	rs->stage.draw = nv50->draw; +	rs->stage.destroy = nv50_render_destroy; +	rs->stage.point = nv50_render_point; +	rs->stage.line = nv50_render_line; +	rs->stage.tri = nv50_render_tri; +	rs->stage.flush = nv50_render_flush; +	rs->stage.reset_stipple_counter = nv50_render_reset_stipple_counter; + +	return &rs->stage; +} + diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c new file mode 100644 index 0000000000..91091d53f5 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -0,0 +1,320 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv50_context.h" + +static struct pipe_texture * +nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) +{ +	struct pipe_winsys *ws = pscreen->winsys; +	struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); +	struct pipe_texture *pt = &mt->base; +	unsigned usage, width = tmp->width[0], height = tmp->height[0]; +	unsigned depth = tmp->depth[0]; +	int i, l; + +	mt->base = *tmp; +	mt->base.refcount = 1; +	mt->base.screen = pscreen; + +	usage = PIPE_BUFFER_USAGE_PIXEL; +	switch (pt->format) { +	case PIPE_FORMAT_Z24S8_UNORM: +	case PIPE_FORMAT_Z16_UNORM: +		usage |= NOUVEAU_BUFFER_USAGE_ZETA; +		break; +	default: +		break; +	} + +	switch (pt->target) { +	case PIPE_TEXTURE_3D: +		mt->image_nr = pt->depth[0]; +		break; +	case PIPE_TEXTURE_CUBE: +		mt->image_nr = 6; +		break; +	default: +		mt->image_nr = 1; +		break; +	} + +	for (l = 0; l <= pt->last_level; l++) { +		struct nv50_miptree_level *lvl = &mt->level[l]; + +		pt->width[l] = width; +		pt->height[l] = height; +		pt->depth[l] = depth; +		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); +		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + +		lvl->image_offset = CALLOC(mt->image_nr, sizeof(int)); +		lvl->image = CALLOC(mt->image_nr, sizeof(struct pipe_buffer *)); + +		width = MAX2(1, width >> 1); +		height = MAX2(1, height >> 1); +		depth = MAX2(1, depth >> 1); +	} + +	for (i = 0; i < mt->image_nr; i++) { +		for (l = 0; l <= pt->last_level; l++) { +			struct nv50_miptree_level *lvl = &mt->level[l]; +			int size; + +			size  = align(pt->width[l], 8) * pt->block.size; +			size  = align(size, 64); +			size *= align(pt->height[l], 8) * pt->block.size; + +			lvl->image[i] = ws->buffer_create(ws, 256, 0, size); +			lvl->image_offset[i] = mt->total_size; + +			mt->total_size += size; +		} +	} + +	mt->buffer = ws->buffer_create(ws, 256, usage, mt->total_size); +	if (!mt->buffer) { +		FREE(mt); +		return NULL; +	} + +	return &mt->base; +} + +static struct pipe_texture * +nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, +		     const unsigned *stride, struct pipe_buffer *pb) +{ +	struct nv50_miptree *mt; + +	/* Only supports 2D, non-mipmapped textures for the moment */ +	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || +	    pt->depth[0] != 1) +		return NULL; + +	mt = CALLOC_STRUCT(nv50_miptree); +	if (!mt) +		return NULL; + +	mt->base = *pt; +	mt->base.refcount = 1; +	mt->base.screen = pscreen; +	mt->image_nr = 1; +	mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + +	pipe_buffer_reference(pscreen, &mt->buffer, pb); +	return &mt->base; +} + +static INLINE void +mark_dirty(uint32_t *flags, unsigned image) +{ +	flags[image / 32] |= (1 << (image % 32)); +} + +static INLINE void +mark_clean(uint32_t *flags, unsigned image) +{ +	flags[image / 32] &= ~(1 << (image % 32)); +} + +static INLINE int +is_dirty(uint32_t *flags, unsigned image) +{ +	return !!(flags[image / 32] & (1 << (image % 32))); +} + +static void +nv50_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) +{ +	struct pipe_texture *pt = *ppt; + +	*ppt = NULL; + +	if (--pt->refcount <= 0) { +		struct nv50_miptree *mt = nv50_miptree(pt); + +		pipe_buffer_reference(pscreen, &mt->buffer, NULL); +		FREE(mt); +	} +} + +void +nv50_miptree_sync(struct pipe_screen *pscreen, struct nv50_miptree *mt, +		  unsigned level, unsigned image) +{ +	struct nv50_screen *nvscreen = nv50_screen(pscreen); +	struct nv50_miptree_level *lvl = &mt->level[level]; +	struct pipe_surface *dst, *src; +	unsigned face = 0, zslice = 0; + +	if (!is_dirty(lvl->image_dirty_cpu, image)) +		return; + +	if (mt->base.target == PIPE_TEXTURE_CUBE) +		face = image; +	else +	if (mt->base.target == PIPE_TEXTURE_3D) +		zslice = image; + +	/* Mark as clean already - so we don't continually call this function +	 * trying to get a GPU_WRITE pipe_surface! +	 */ +	mark_clean(lvl->image_dirty_cpu, image); + +	/* Pretend we're doing CPU access so we get the backing pipe_surface +	 * and not a view into the larger miptree. +	 */ +	src = pscreen->get_tex_surface(pscreen, &mt->base, face, level, zslice, +				       PIPE_BUFFER_USAGE_CPU_READ); + +	/* Pretend we're only reading with the GPU so surface doesn't get marked +	 * as dirtied by the GPU. +	 */ +	dst = pscreen->get_tex_surface(pscreen, &mt->base, face, level, zslice, +				       PIPE_BUFFER_USAGE_GPU_READ); + +	nv50_surface_do_copy(nvscreen, dst, 0, 0, src, 0, 0, dst->width, dst->height); + +	pscreen->tex_surface_release(pscreen, &dst); +	pscreen->tex_surface_release(pscreen, &src); +} + +/* The reverse of the above */ +static void +nv50_miptree_sync_cpu(struct pipe_screen *pscreen, struct nv50_miptree *mt, +		      unsigned level, unsigned image) +{ +	struct nv50_screen *nvscreen = nv50_screen(pscreen); +	struct nv50_miptree_level *lvl = &mt->level[level]; +	struct pipe_surface *dst, *src; +	unsigned face = 0, zslice = 0; + +	if (!is_dirty(lvl->image_dirty_gpu, image)) +		return; + +	if (mt->base.target == PIPE_TEXTURE_CUBE) +		face = image; +	else +	if (mt->base.target == PIPE_TEXTURE_3D) +		zslice = image; + +	mark_clean(lvl->image_dirty_gpu, image); + +	src = pscreen->get_tex_surface(pscreen, &mt->base, face, level, zslice, +				       PIPE_BUFFER_USAGE_GPU_READ); +	dst = pscreen->get_tex_surface(pscreen, &mt->base, face, level, zslice, +				       PIPE_BUFFER_USAGE_CPU_READ); + +	nv50_surface_do_copy(nvscreen, dst, 0, 0, src, 0, 0, dst->width, dst->height); + +	pscreen->tex_surface_release(pscreen, &dst); +	pscreen->tex_surface_release(pscreen, &src); +} + +static struct pipe_surface * +nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, +			 unsigned face, unsigned level, unsigned zslice, +			 unsigned flags) +{ +	struct nv50_miptree *mt = nv50_miptree(pt); +	struct nv50_miptree_level *lvl = &mt->level[level]; +	struct pipe_surface *ps; +	int img; + +	if (pt->target == PIPE_TEXTURE_CUBE) +		img = face; +	else +	if (pt->target == PIPE_TEXTURE_3D) +		img = zslice; +	else +		img = 0; + +	ps = CALLOC_STRUCT(pipe_surface); +	if (!ps) +		return NULL; +	pipe_texture_reference(&ps->texture, pt); +	ps->format = pt->format; +	ps->width = pt->width[level]; +	ps->height = pt->height[level]; +	ps->block = pt->block; +	ps->nblocksx = pt->nblocksx[level]; +	ps->nblocksy = pt->nblocksy[level]; +	ps->stride = ps->width * ps->block.size; +	ps->usage = flags; +	ps->status = PIPE_SURFACE_STATUS_DEFINED; +	ps->refcount = 1; +	ps->face = face; +	ps->level = level; +	ps->zslice = zslice; + +	if (flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { +		assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE)); +		nv50_miptree_sync_cpu(pscreen, mt, level, img); + +		ps->offset = 0; +		pipe_texture_reference(&ps->texture, pt); + +		if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) +			mark_dirty(lvl->image_dirty_cpu, img); +	} else { +		nv50_miptree_sync(pscreen, mt, level, img); + +		ps->offset = lvl->image_offset[img]; +		pipe_texture_reference(&ps->texture, pt); + +		if (flags & PIPE_BUFFER_USAGE_GPU_WRITE) +			mark_dirty(lvl->image_dirty_gpu, img); +	} + +	return ps; +} + +static void +nv50_miptree_surface_del(struct pipe_screen *pscreen, +			 struct pipe_surface **psurface) +{ +	struct pipe_surface *ps = *psurface; +	struct nv50_surface *s = nv50_surface(ps); + +	*psurface = NULL; + +	if (--ps->refcount <= 0) { +		pipe_texture_reference(&ps->texture, NULL); +		FREE(s); +	} +} + +void +nv50_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ +	pscreen->texture_create = nv50_miptree_create; +	pscreen->texture_blanket = nv50_miptree_blanket; +	pscreen->texture_release = nv50_miptree_release; +	pscreen->get_tex_surface = nv50_miptree_surface_new; +	pscreen->tex_surface_release = nv50_miptree_surface_del; +} + diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c new file mode 100644 index 0000000000..14c5d47e79 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -0,0 +1,1784 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv50_context.h" + +#define NV50_SU_MAX_TEMP 64 +//#define NV50_PROGRAM_DUMP + +/* ARL - gallium craps itself on progs/vp/arl.txt + * + * MSB - Like MAD, but MUL+SUB + * 	- Fuck it off, introduce a way to negate args for ops that + * 	  support it. + * + * Look into inlining IMMD for ops other than MOV (make it general?) + * 	- Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, + * 	  but can emit to P_TEMP first - then MOV later. NVIDIA does this + * + * In ops such as ADD it's possible to construct a bad opcode in the !is_long() + * case, if the emit_src() causes the inst to suddenly become long. + * + * Verify half-insns work where expected - and force disable them where they + * don't work - MUL has it forcibly disabled atm as it fixes POW.. + * + * FUCK! watch dst==src vectors, can overwrite components that are needed. + * 	ie. SUB R0, R0.yzxw, R0 + * + * Things to check with renouveau: + * 	FP attr/result assignment - how? + * 		attrib + * 			- 0x16bc maps vp output onto fp hpos + * 			- 0x16c0 maps vp output onto fp col0 + * 		result + * 			- colr always 0-3 + * 			- depr always 4 + * 0x16bc->0x16e8 --> some binding between vp/fp regs + * 0x16b8 --> VP output count + * + * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005 + * 	      "MOV rcol.x, fcol.y" = 0x00000004 + * 0x19a8 --> as above but 0x00000100 and 0x00000000 + * 	- 0x00100000 used when KIL used + * 0x196c --> as above but 0x00000011 and 0x00000000 + * + * 0x1988 --> 0xXXNNNNNN + * 	- XX == FP high something + */ +struct nv50_reg { +	enum { +		P_TEMP, +		P_ATTR, +		P_RESULT, +		P_CONST, +		P_IMMD +	} type; +	int index; + +	int hw; +	int neg; +}; + +struct nv50_pc { +	struct nv50_program *p; + +	/* hw resources */ +	struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; + +	/* tgsi resources */ +	struct nv50_reg *temp; +	int temp_nr; +	struct nv50_reg *attr; +	int attr_nr; +	struct nv50_reg *result; +	int result_nr; +	struct nv50_reg *param; +	int param_nr; +	struct nv50_reg *immd; +	float *immd_buf; +	int immd_nr; + +	struct nv50_reg *temp_temp[16]; +	unsigned temp_temp_nr; +}; + +static void +alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) +{ +	int i; + +	if (reg->type == P_RESULT) { +		if (pc->p->cfg.high_result < (reg->hw + 1)) +			pc->p->cfg.high_result = reg->hw + 1; +	} + +	if (reg->type != P_TEMP) +		return; + +	if (reg->hw >= 0) { +		/*XXX: do this here too to catch FP temp-as-attr usage.. +		 *     not clean, but works */ +		if (pc->p->cfg.high_temp < (reg->hw + 1)) +			pc->p->cfg.high_temp = reg->hw + 1; +		return; +	} + +	for (i = 0; i < NV50_SU_MAX_TEMP; i++) { +		if (!(pc->r_temp[i])) { +			pc->r_temp[i] = reg; +			reg->hw = i; +			if (pc->p->cfg.high_temp < (i + 1)) +				pc->p->cfg.high_temp = i + 1; +			return; +		} +	} + +	assert(0); +} + +static struct nv50_reg * +alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) +{ +	struct nv50_reg *r; +	int i; + +	if (dst && dst->type == P_TEMP && dst->hw == -1) +		return dst; + +	for (i = 0; i < NV50_SU_MAX_TEMP; i++) { +		if (!pc->r_temp[i]) { +			r = CALLOC_STRUCT(nv50_reg); +			r->type = P_TEMP; +			r->index = -1; +			r->hw = i; +			pc->r_temp[i] = r; +			return r; +		} +	} + +	assert(0); +	return NULL; +} + +static void +free_temp(struct nv50_pc *pc, struct nv50_reg *r) +{ +	if (r->index == -1) { +		unsigned hw = r->hw; + +		FREE(pc->r_temp[hw]); +		pc->r_temp[hw] = NULL; +	} +} + +static int +alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx) +{ +	int i; + +	if ((idx + 4) >= NV50_SU_MAX_TEMP) +		return 1; + +	if (pc->r_temp[idx] || pc->r_temp[idx + 1] || +	    pc->r_temp[idx + 2] || pc->r_temp[idx + 3]) +		return alloc_temp4(pc, dst, idx + 1); + +	for (i = 0; i < 4; i++) { +		dst[i] = CALLOC_STRUCT(nv50_reg); +		dst[i]->type = P_TEMP; +		dst[i]->index = -1; +		dst[i]->hw = idx + i; +		pc->r_temp[idx + i] = dst[i]; +	} + +	return 0; +} + +static void +free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4]) +{ +	int i; + +	for (i = 0; i < 4; i++) +		free_temp(pc, reg[i]); +} + +static struct nv50_reg * +temp_temp(struct nv50_pc *pc) +{ +	if (pc->temp_temp_nr >= 16) +		assert(0); + +	pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); +	return pc->temp_temp[pc->temp_temp_nr++]; +} + +static void +kill_temp_temp(struct nv50_pc *pc) +{ +	int i; +	 +	for (i = 0; i < pc->temp_temp_nr; i++) +		free_temp(pc, pc->temp_temp[i]); +	pc->temp_temp_nr = 0; +} + +static int +ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) +{ +	pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)), +			       (pc->immd_nr + 1) * 4 * sizeof(float)); +	pc->immd_buf[(pc->immd_nr * 4) + 0] = x; +	pc->immd_buf[(pc->immd_nr * 4) + 1] = y; +	pc->immd_buf[(pc->immd_nr * 4) + 2] = z; +	pc->immd_buf[(pc->immd_nr * 4) + 3] = w; +	 +	return pc->immd_nr++; +} + +static struct nv50_reg * +alloc_immd(struct nv50_pc *pc, float f) +{ +	struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); +	unsigned hw; + +	hw = ctor_immd(pc, f, 0, 0, 0) * 4; +	r->type = P_IMMD; +	r->hw = hw; +	r->index = -1; +	return r; +} + +static struct nv50_program_exec * +exec(struct nv50_pc *pc) +{ +	struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec); + +	e->param.index = -1; +	return e; +} + +static void +emit(struct nv50_pc *pc, struct nv50_program_exec *e) +{ +	struct nv50_program *p = pc->p; + +	if (p->exec_tail) +		p->exec_tail->next = e; +	if (!p->exec_head) +		p->exec_head = e; +	p->exec_tail = e; +	p->exec_size += (e->inst[0] & 1) ? 2 : 1; +} + +static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); + +static boolean +is_long(struct nv50_program_exec *e) +{ +	if (e->inst[0] & 1) +		return TRUE; +	return FALSE; +} + +static boolean +is_immd(struct nv50_program_exec *e) +{ +	if (is_long(e) && (e->inst[1] & 3) == 3) +		return TRUE; +	return FALSE; +} + +static INLINE void +set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, +	 struct nv50_program_exec *e) +{ +	set_long(pc, e); +	e->inst[1] &= ~((0x1f << 7) | (0x3 << 12)); +	e->inst[1] |= (pred << 7) | (idx << 12); +} + +static INLINE void +set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, +	    struct nv50_program_exec *e) +{ +	set_long(pc, e); +	e->inst[1] &= ~((0x3 << 4) | (1 << 6)); +	e->inst[1] |= (idx << 4) | (on << 6); +} + +static INLINE void +set_long(struct nv50_pc *pc, struct nv50_program_exec *e) +{ +	if (is_long(e)) +		return; + +	e->inst[0] |= 1; +	set_pred(pc, 0xf, 0, e); +	set_pred_wr(pc, 0, 0, e); +} + +static INLINE void +set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) +{ +	if (dst->type == P_RESULT) { +		set_long(pc, e); +		e->inst[1] |= 0x00000008; +	} + +	alloc_reg(pc, dst); +	e->inst[0] |= (dst->hw << 2); +} + +static INLINE void +set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) +{ +	unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ + +	set_long(pc, e); +	/*XXX: can't be predicated - bits overlap.. catch cases where both +	 *     are required and avoid them. */ +	set_pred(pc, 0, 0, e); +	set_pred_wr(pc, 0, 0, e); + +	e->inst[1] |= 0x00000002 | 0x00000001; +	e->inst[0] |= (val & 0x3f) << 16; +	e->inst[1] |= (val >> 6) << 2; +} + +static void +emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, +	    struct nv50_reg *src, struct nv50_reg *iv) +{ +	struct nv50_program_exec *e = exec(pc); + +	e->inst[0] |= 0x80000000; +	set_dst(pc, dst, e); +	alloc_reg(pc, src); +	e->inst[0] |= (src->hw << 16); +	if (iv) { +		e->inst[0] |= (1 << 25); +		alloc_reg(pc, iv); +		e->inst[0] |= (iv->hw << 9); +	} + +	emit(pc, e); +} + +static void +set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, +	 struct nv50_program_exec *e) +{ +	set_long(pc, e); +#if 1 +	e->inst[1] |= (1 << 22); +#else +	if (src->type == P_IMMD) { +		e->inst[1] |= (NV50_CB_PMISC << 22); +	} else { +		if (pc->p->type == PIPE_SHADER_VERTEX) +			e->inst[1] |= (NV50_CB_PVP << 22); +		else +			e->inst[1] |= (NV50_CB_PFP << 22); +	} +#endif + +	e->param.index = src->hw; +	e->param.shift = s; +	e->param.mask = m << (s % 32); +} + +static void +emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ +	struct nv50_program_exec *e = exec(pc); + +	e->inst[0] |= 0x10000000; + +	set_dst(pc, dst, e); + +	if (0 && dst->type != P_RESULT && src->type == P_IMMD) { +		set_immd(pc, src, e); +		/*XXX: 32-bit, but steals part of "half" reg space - need to +		 *     catch and handle this case if/when we do half-regs +		 */ +		e->inst[0] |= 0x00008000; +	} else +	if (src->type == P_IMMD || src->type == P_CONST) { +		set_long(pc, e); +		set_data(pc, src, 0x7f, 9, e); +		e->inst[1] |= 0x20000000; /* src0 const? */ +	} else { +		if (src->type == P_ATTR) { +			set_long(pc, e); +			e->inst[1] |= 0x00200000; +		} + +		alloc_reg(pc, src); +		e->inst[0] |= (src->hw << 9); +	} + +	/* We really should support "half" instructions here at some point, +	 * but I don't feel confident enough about them yet. +	 */ +	set_long(pc, e); +	if (is_long(e) && !is_immd(e)) { +		e->inst[1] |= 0x04000000; /* 32-bit */ +		e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ +	} + +	emit(pc, e); +} + +static boolean +check_swap_src_0_1(struct nv50_pc *pc, +		   struct nv50_reg **s0, struct nv50_reg **s1) +{ +	struct nv50_reg *src0 = *s0, *src1 = *s1; + +	if (src0->type == P_CONST) { +		if (src1->type != P_CONST) { +			*s0 = src1; +			*s1 = src0; +			return TRUE; +		} +	} else +	if (src1->type == P_ATTR) { +		if (src0->type != P_ATTR) { +			*s0 = src1; +			*s1 = src0; +			return TRUE; +		} +	} + +	return FALSE; +} + +static void +set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) +{ +	if (src->type == P_ATTR) { +		set_long(pc, e); +		e->inst[1] |= 0x00200000; +	} else +	if (src->type == P_CONST || src->type == P_IMMD) { +		struct nv50_reg *temp = temp_temp(pc); + +		emit_mov(pc, temp, src); +		src = temp; +	} + +	alloc_reg(pc, src); +	e->inst[0] |= (src->hw << 9); +} + +static void +set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) +{ +	if (src->type == P_ATTR) { +		struct nv50_reg *temp = temp_temp(pc); + +		emit_mov(pc, temp, src); +		src = temp; +	} else +	if (src->type == P_CONST || src->type == P_IMMD) { +		assert(!(e->inst[0] & 0x00800000)); +		if (e->inst[0] & 0x01000000) { +			struct nv50_reg *temp = temp_temp(pc); + +			emit_mov(pc, temp, src); +			src = temp; +		} else { +			set_data(pc, src, 0x7f, 16, e); +			e->inst[0] |= 0x00800000; +		} +	} + +	alloc_reg(pc, src); +	e->inst[0] |= (src->hw << 16); +} + +static void +set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) +{ +	set_long(pc, e); + +	if (src->type == P_ATTR) { +		struct nv50_reg *temp = temp_temp(pc); + +		emit_mov(pc, temp, src); +		src = temp; +	} else +	if (src->type == P_CONST || src->type == P_IMMD) { +		assert(!(e->inst[0] & 0x01000000)); +		if (e->inst[0] & 0x00800000) { +			struct nv50_reg *temp = temp_temp(pc); + +			emit_mov(pc, temp, src); +			src = temp; +		} else { +			set_data(pc, src, 0x7f, 32+14, e); +			e->inst[0] |= 0x01000000; +		} +	} + +	alloc_reg(pc, src); +	e->inst[1] |= (src->hw << 14); +} + +static void +emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, +	 struct nv50_reg *src1) +{ +	struct nv50_program_exec *e = exec(pc); + +	e->inst[0] |= 0xc0000000; +	set_long(pc, e); + +	check_swap_src_0_1(pc, &src0, &src1); +	set_dst(pc, dst, e); +	set_src_0(pc, src0, e); +	set_src_1(pc, src1, e); + +	emit(pc, e); +} + +static void +emit_add(struct nv50_pc *pc, struct nv50_reg *dst, +	 struct nv50_reg *src0, struct nv50_reg *src1) +{ +	struct nv50_program_exec *e = exec(pc); + +	e->inst[0] |= 0xb0000000; + +	check_swap_src_0_1(pc, &src0, &src1); +	set_dst(pc, dst, e); +	set_src_0(pc, src0, e); +	if (is_long(e)) +		set_src_2(pc, src1, e); +	else +		set_src_1(pc, src1, e); + +	emit(pc, e); +} + +static void +emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, +	    struct nv50_reg *src0, struct nv50_reg *src1) +{ +	struct nv50_program_exec *e = exec(pc); + +	set_long(pc, e); +	e->inst[0] |= 0xb0000000; +	e->inst[1] |= (sub << 29); + +	check_swap_src_0_1(pc, &src0, &src1); +	set_dst(pc, dst, e); +	set_src_0(pc, src0, e); +	set_src_1(pc, src1, e); + +	emit(pc, e); +} + +static void +emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, +	 struct nv50_reg *src1) +{ +	struct nv50_program_exec *e = exec(pc); + +	e->inst[0] |= 0xb0000000; + +	set_long(pc, e); +	if (check_swap_src_0_1(pc, &src0, &src1)) +		e->inst[1] |= 0x04000000; +	else +		e->inst[1] |= 0x08000000; + +	set_dst(pc, dst, e); +	set_src_0(pc, src0, e); +	set_src_2(pc, src1, e); + +	emit(pc, e); +} + +static void +emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, +	 struct nv50_reg *src1, struct nv50_reg *src2) +{ +	struct nv50_program_exec *e = exec(pc); + +	e->inst[0] |= 0xe0000000; + +	check_swap_src_0_1(pc, &src0, &src1); +	set_dst(pc, dst, e); +	set_src_0(pc, src0, e); +	set_src_1(pc, src1, e); +	set_src_2(pc, src2, e); + +	emit(pc, e); +} + +static void +emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, +	 struct nv50_reg *src1, struct nv50_reg *src2) +{ +	struct nv50_program_exec *e = exec(pc); + +	e->inst[0] |= 0xe0000000; +	set_long(pc, e); +	e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */ + +	check_swap_src_0_1(pc, &src0, &src1); +	set_dst(pc, dst, e); +	set_src_0(pc, src0, e); +	set_src_1(pc, src1, e); +	set_src_2(pc, src2, e); + +	emit(pc, e); +} + +static void +emit_flop(struct nv50_pc *pc, unsigned sub, +	  struct nv50_reg *dst, struct nv50_reg *src) +{ +	struct nv50_program_exec *e = exec(pc); + +	e->inst[0] |= 0x90000000; +	if (sub) { +		set_long(pc, e); +		e->inst[1] |= (sub << 29); +	} + +	set_dst(pc, dst, e); +	set_src_0(pc, src, e); + +	emit(pc, e); +} + +static void +emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ +	struct nv50_program_exec *e = exec(pc); + +	e->inst[0] |= 0xb0000000; + +	set_dst(pc, dst, e); +	set_src_0(pc, src, e); +	set_long(pc, e); +	e->inst[1] |= (6 << 29) | 0x00004000; + +	emit(pc, e); +} + +static void +emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ +	struct nv50_program_exec *e = exec(pc); + +	e->inst[0] |= 0xb0000000; + +	set_dst(pc, dst, e); +	set_src_0(pc, src, e); +	set_long(pc, e); +	e->inst[1] |= (6 << 29); + +	emit(pc, e); +} + +static void +emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, +	 struct nv50_reg *src0, struct nv50_reg *src1) +{ +	struct nv50_program_exec *e = exec(pc); +	unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; +	struct nv50_reg *rdst; + +	assert(c_op <= 7); +	if (check_swap_src_0_1(pc, &src0, &src1)) +		c_op = inv_cop[c_op]; + +	rdst = dst; +	if (dst->type != P_TEMP) +		dst = alloc_temp(pc, NULL); + +	/* set.u32 */ +	set_long(pc, e); +	e->inst[0] |= 0xb0000000; +	e->inst[1] |= (3 << 29); +	e->inst[1] |= (c_op << 14); +	/*XXX: breaks things, .u32 by default? +	 *     decuda will disasm as .u16 and use .lo/.hi regs, but this +	 *     doesn't seem to match what the hw actually does. +	inst[1] |= 0x04000000; << breaks things.. .u32 by default? +	 */ +	set_dst(pc, dst, e); +	set_src_0(pc, src0, e); +	set_src_1(pc, src1, e); +	emit(pc, e); + +	/* cvt.f32.u32 */ +	e = exec(pc); +	e->inst[0] = 0xa0000001; +	e->inst[1] = 0x64014780; +	set_dst(pc, rdst, e); +	set_src_0(pc, dst, e); +	emit(pc, e); + +	if (dst != rdst) +		free_temp(pc, dst); +} + +static void +emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ +	struct nv50_program_exec *e = exec(pc); + +	e->inst[0] = 0xa0000000; /* cvt */ +	set_long(pc, e); +	e->inst[1] |= (6 << 29); /* cvt */ +	e->inst[1] |= 0x08000000; /* integer mode */ +	e->inst[1] |= 0x04000000; /* 32 bit */ +	e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */ +	e->inst[1] |= (1 << 14); /* src .f32 */ +	set_dst(pc, dst, e); +	set_src_0(pc, src, e); + +	emit(pc, e); +} + +static void +emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, +	 struct nv50_reg *v, struct nv50_reg *e) +{ +	struct nv50_reg *temp = alloc_temp(pc, NULL); + +	emit_flop(pc, 3, temp, v); +	emit_mul(pc, temp, temp, e); +	emit_preex2(pc, temp, temp); +	emit_flop(pc, 6, dst, temp); + +	free_temp(pc, temp); +} + +static void +emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ +	struct nv50_program_exec *e = exec(pc); + +	e->inst[0] = 0xa0000000; /* cvt */ +	set_long(pc, e); +	e->inst[1] |= (6 << 29); /* cvt */ +	e->inst[1] |= 0x04000000; /* 32 bit */ +	e->inst[1] |= (1 << 14); /* src .f32 */ +	e->inst[1] |= ((1 << 6) << 14); /* .abs */ +	set_dst(pc, dst, e); +	set_src_0(pc, src, e); + +	emit(pc, e); +} + +static void +emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, +	 struct nv50_reg **src) +{ +	struct nv50_reg *one = alloc_immd(pc, 1.0); +	struct nv50_reg *zero = alloc_immd(pc, 0.0); +	struct nv50_reg *neg128 = alloc_immd(pc, -127.999999); +	struct nv50_reg *pos128 = alloc_immd(pc,  127.999999); +	struct nv50_reg *tmp[4]; + +	if (mask & (1 << 0)) +		emit_mov(pc, dst[0], one); + +	if (mask & (1 << 3)) +		emit_mov(pc, dst[3], one); + +	if (mask & (3 << 1)) { +		if (mask & (1 << 1)) +			tmp[0] = dst[1]; +		else +			tmp[0] = temp_temp(pc); +		emit_minmax(pc, 4, tmp[0], src[0], zero); +	} + +	if (mask & (1 << 2)) { +		set_pred_wr(pc, 1, 0, pc->p->exec_tail); + +		tmp[1] = temp_temp(pc); +		emit_minmax(pc, 4, tmp[1], src[1], zero); + +		tmp[3] = temp_temp(pc); +		emit_minmax(pc, 4, tmp[3], src[3], neg128); +		emit_minmax(pc, 5, tmp[3], tmp[3], pos128); + +		emit_pow(pc, dst[2], tmp[1], tmp[3]); +		emit_mov(pc, dst[2], zero); +		set_pred(pc, 3, 0, pc->p->exec_tail); +	} +} + +static void +emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ +	struct nv50_program_exec *e = exec(pc); + +	set_long(pc, e); +	e->inst[0] |= 0xa0000000; /* delta */ +	e->inst[1] |= (7 << 29); /* delta */ +	e->inst[1] |= 0x04000000; /* negate arg0? probably not */ +	e->inst[1] |= (1 << 14); /* src .f32 */ +	set_dst(pc, dst, e); +	set_src_0(pc, src, e); + +	emit(pc, e); +} + +static void +emit_kil(struct nv50_pc *pc, struct nv50_reg *src) +{ +	struct nv50_program_exec *e; +	const int r_pred = 1; + +	/* Sets predicate reg ? */ +	e = exec(pc); +	e->inst[0] = 0xa00001fd; +	e->inst[1] = 0xc4014788; +	set_src_0(pc, src, e); +	set_pred_wr(pc, 1, r_pred, e); +	emit(pc, e); + +	/* This is probably KILP */ +	e = exec(pc); +	e->inst[0] = 0x000001fe; +	set_long(pc, e); +	set_pred(pc, 1 /* LT? */, r_pred, e); +	emit(pc, e); +} + +static struct nv50_reg * +tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) +{ +	switch (dst->DstRegister.File) { +	case TGSI_FILE_TEMPORARY: +		return &pc->temp[dst->DstRegister.Index * 4 + c]; +	case TGSI_FILE_OUTPUT: +		return &pc->result[dst->DstRegister.Index * 4 + c]; +	case TGSI_FILE_NULL: +		return NULL; +	default: +		break; +	} + +	return NULL; +} + +static struct nv50_reg * +tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) +{ +	struct nv50_reg *r = NULL; +	struct nv50_reg *temp; +	unsigned c; + +	c = tgsi_util_get_full_src_register_extswizzle(src, chan); +	switch (c) { +	case TGSI_EXTSWIZZLE_X: +	case TGSI_EXTSWIZZLE_Y: +	case TGSI_EXTSWIZZLE_Z: +	case TGSI_EXTSWIZZLE_W: +		switch (src->SrcRegister.File) { +		case TGSI_FILE_INPUT: +			r = &pc->attr[src->SrcRegister.Index * 4 + c]; +			break; +		case TGSI_FILE_TEMPORARY: +			r = &pc->temp[src->SrcRegister.Index * 4 + c]; +			break; +		case TGSI_FILE_CONSTANT: +			r = &pc->param[src->SrcRegister.Index * 4 + c]; +			break; +		case TGSI_FILE_IMMEDIATE: +			r = &pc->immd[src->SrcRegister.Index * 4 + c]; +			break; +		case TGSI_FILE_SAMPLER: +			break; +		default: +			assert(0); +			break; +		} +		break; +	case TGSI_EXTSWIZZLE_ZERO: +		r = alloc_immd(pc, 0.0); +		break; +	case TGSI_EXTSWIZZLE_ONE: +		r = alloc_immd(pc, 1.0); +		break; +	default: +		assert(0); +		break; +	} + +	switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { +	case TGSI_UTIL_SIGN_KEEP: +		break; +	case TGSI_UTIL_SIGN_CLEAR: +		temp = temp_temp(pc); +		emit_abs(pc, temp, r); +		r = temp; +		break; +	case TGSI_UTIL_SIGN_TOGGLE: +		temp = temp_temp(pc); +		emit_neg(pc, temp, r); +		r = temp; +		break; +	case TGSI_UTIL_SIGN_SET: +		temp = temp_temp(pc); +		emit_abs(pc, temp, r); +		emit_neg(pc, temp, r); +		r = temp; +		break; +	default: +		assert(0); +		break; +	} + +	return r; +} + +static boolean +nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) +{ +	const struct tgsi_full_instruction *inst = &tok->FullInstruction; +	struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; +	unsigned mask, sat, unit; +	int i, c; + +	mask = inst->FullDstRegisters[0].DstRegister.WriteMask; +	sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; + +	for (c = 0; c < 4; c++) { +		if (mask & (1 << c)) +			dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); +		else +			dst[c] = NULL; +	} + +	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { +		const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; + +		if (fs->SrcRegister.File == TGSI_FILE_SAMPLER) +			unit = fs->SrcRegister.Index; + +		for (c = 0; c < 4; c++) +			src[i][c] = tgsi_src(pc, c, fs); +	} + +	if (sat) { +		for (c = 0; c < 4; c++) { +			rdst[c] = dst[c]; +			dst[c] = temp_temp(pc); +		} +	} + +	switch (inst->Instruction.Opcode) { +	case TGSI_OPCODE_ABS: +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_abs(pc, dst[c], src[0][c]); +		} +		break; +	case TGSI_OPCODE_ADD: +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_add(pc, dst[c], src[0][c], src[1][c]); +		} +		break; +	case TGSI_OPCODE_COS: +		temp = alloc_temp(pc, NULL); +		emit_precossin(pc, temp, src[0][0]); +		emit_flop(pc, 5, temp, temp); +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_mov(pc, dst[c], temp); +		} +		break; +	case TGSI_OPCODE_DP3: +		temp = alloc_temp(pc, NULL); +		emit_mul(pc, temp, src[0][0], src[1][0]); +		emit_mad(pc, temp, src[0][1], src[1][1], temp); +		emit_mad(pc, temp, src[0][2], src[1][2], temp); +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_mov(pc, dst[c], temp); +		} +		free_temp(pc, temp); +		break; +	case TGSI_OPCODE_DP4: +		temp = alloc_temp(pc, NULL); +		emit_mul(pc, temp, src[0][0], src[1][0]); +		emit_mad(pc, temp, src[0][1], src[1][1], temp); +		emit_mad(pc, temp, src[0][2], src[1][2], temp); +		emit_mad(pc, temp, src[0][3], src[1][3], temp); +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_mov(pc, dst[c], temp); +		} +		free_temp(pc, temp); +		break; +	case TGSI_OPCODE_DPH: +		temp = alloc_temp(pc, NULL); +		emit_mul(pc, temp, src[0][0], src[1][0]); +		emit_mad(pc, temp, src[0][1], src[1][1], temp); +		emit_mad(pc, temp, src[0][2], src[1][2], temp); +		emit_add(pc, temp, src[1][3], temp); +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_mov(pc, dst[c], temp); +		} +		free_temp(pc, temp); +		break; +	case TGSI_OPCODE_DST: +	{ +		struct nv50_reg *one = alloc_immd(pc, 1.0); +		if (mask & (1 << 0)) +			emit_mov(pc, dst[0], one); +		if (mask & (1 << 1)) +			emit_mul(pc, dst[1], src[0][1], src[1][1]); +		if (mask & (1 << 2)) +			emit_mov(pc, dst[2], src[0][2]); +		if (mask & (1 << 3)) +			emit_mov(pc, dst[3], src[1][3]); +		FREE(one); +	} +		break; +	case TGSI_OPCODE_EX2: +		temp = alloc_temp(pc, NULL); +		emit_preex2(pc, temp, src[0][0]); +		emit_flop(pc, 6, temp, temp); +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_mov(pc, dst[c], temp); +		} +		free_temp(pc, temp); +		break; +	case TGSI_OPCODE_FLR: +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_flr(pc, dst[c], src[0][c]); +		} +		break; +	case TGSI_OPCODE_FRC: +		temp = alloc_temp(pc, NULL); +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_flr(pc, temp, src[0][c]); +			emit_sub(pc, dst[c], src[0][c], temp); +		} +		free_temp(pc, temp); +		break; +	case TGSI_OPCODE_KIL: +		emit_kil(pc, src[0][0]); +		emit_kil(pc, src[0][1]); +		emit_kil(pc, src[0][2]); +		emit_kil(pc, src[0][3]); +		break; +	case TGSI_OPCODE_LIT: +		emit_lit(pc, &dst[0], mask, &src[0][0]); +		break; +	case TGSI_OPCODE_LG2: +		temp = alloc_temp(pc, NULL); +		emit_flop(pc, 3, temp, src[0][0]); +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_mov(pc, dst[c], temp); +		} +		break; +	case TGSI_OPCODE_LRP: +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			/*XXX: we can do better than this */ +			temp = alloc_temp(pc, NULL); +			emit_neg(pc, temp, src[0][c]); +			emit_mad(pc, temp, temp, src[2][c], src[2][c]); +			emit_mad(pc, dst[c], src[0][c], src[1][c], temp); +			free_temp(pc, temp); +		} +		break; +	case TGSI_OPCODE_MAD: +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); +		} +		break; +	case TGSI_OPCODE_MAX: +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); +		} +		break; +	case TGSI_OPCODE_MIN: +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); +		} +		break; +	case TGSI_OPCODE_MOV: +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_mov(pc, dst[c], src[0][c]); +		} +		break; +	case TGSI_OPCODE_MUL: +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_mul(pc, dst[c], src[0][c], src[1][c]); +		} +		break; +	case TGSI_OPCODE_POW: +		temp = alloc_temp(pc, NULL); +		emit_pow(pc, temp, src[0][0], src[1][0]); +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_mov(pc, dst[c], temp); +		} +		free_temp(pc, temp); +		break; +	case TGSI_OPCODE_RCP: +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_flop(pc, 0, dst[c], src[0][0]); +		} +		break; +	case TGSI_OPCODE_RSQ: +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_flop(pc, 2, dst[c], src[0][0]); +		} +		break; +	case TGSI_OPCODE_SCS: +		temp = alloc_temp(pc, NULL); +		emit_precossin(pc, temp, src[0][0]); +		if (mask & (1 << 0)) +			emit_flop(pc, 5, dst[0], temp); +		if (mask & (1 << 1)) +			emit_flop(pc, 4, dst[1], temp); +		break; +	case TGSI_OPCODE_SGE: +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_set(pc, 6, dst[c], src[0][c], src[1][c]); +		} +		break; +	case TGSI_OPCODE_SIN: +		temp = alloc_temp(pc, NULL); +		emit_precossin(pc, temp, src[0][0]); +		emit_flop(pc, 4, temp, temp); +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_mov(pc, dst[c], temp); +		} +		break; +	case TGSI_OPCODE_SLT: +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_set(pc, 1, dst[c], src[0][c], src[1][c]); +		} +		break; +	case TGSI_OPCODE_SUB: +		for (c = 0; c < 4; c++) { +			if (!(mask & (1 << c))) +				continue; +			emit_sub(pc, dst[c], src[0][c], src[1][c]); +		} +		break; +	case TGSI_OPCODE_TEX: +	case TGSI_OPCODE_TXP: +	{ +		struct nv50_reg *t[4]; +		struct nv50_program_exec *e; + +		alloc_temp4(pc, t, 0); +		emit_mov(pc, t[0], src[0][0]); +		emit_mov(pc, t[1], src[0][1]); + +		e = exec(pc); +		e->inst[0] = 0xf6400000; +		e->inst[0] |= (unit << 9); +		set_long(pc, e); +		e->inst[1] |= 0x0000c004; +		set_dst(pc, t[0], e); +		emit(pc, e); + +		if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]); +		if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]); +		if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]); +		if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]); + +		free_temp4(pc, t); +	} +		break; +	case TGSI_OPCODE_XPD: +		temp = alloc_temp(pc, NULL); +		if (mask & (1 << 0)) { +			emit_mul(pc, temp, src[0][2], src[1][1]); +			emit_msb(pc, dst[0], src[0][1], src[1][2], temp); +		} +		if (mask & (1 << 1)) { +			emit_mul(pc, temp, src[0][0], src[1][2]); +			emit_msb(pc, dst[1], src[0][2], src[1][0], temp); +		} +		if (mask & (1 << 2)) { +			emit_mul(pc, temp, src[0][1], src[1][0]); +			emit_msb(pc, dst[2], src[0][0], src[1][1], temp); +		} +		free_temp(pc, temp); +		break; +	case TGSI_OPCODE_END: +		break; +	default: +		NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); +		return FALSE; +	} + +	if (sat) { +		for (c = 0; c < 4; c++) { +			struct nv50_program_exec *e; + +			if (!(mask & (1 << c))) +				continue; +			e = exec(pc); + +			e->inst[0] = 0xa0000000; /* cvt */ +			set_long(pc, e); +			e->inst[1] |= (6 << 29); /* cvt */ +			e->inst[1] |= 0x04000000; /* 32 bit */ +			e->inst[1] |= (1 << 14); /* src .f32 */ +			e->inst[1] |= ((1 << 5) << 14); /* .sat */ +			set_dst(pc, rdst[c], e); +			set_src_0(pc, dst[c], e); +			emit(pc, e); +		} +	} + +	kill_temp_temp(pc); +	return TRUE; +} + +static boolean +nv50_program_tx_prep(struct nv50_pc *pc) +{ +	struct tgsi_parse_context p; +	boolean ret = FALSE; +	unsigned i, c; + +	tgsi_parse_init(&p, pc->p->pipe.tokens); +	while (!tgsi_parse_end_of_tokens(&p)) { +		const union tgsi_full_token *tok = &p.FullToken; + +		tgsi_parse_token(&p); +		switch (tok->Token.Type) { +		case TGSI_TOKEN_TYPE_IMMEDIATE: +		{ +			const struct tgsi_full_immediate *imm = +				&p.FullToken.FullImmediate; + +			ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, +				      imm->u.ImmediateFloat32[1].Float, +				      imm->u.ImmediateFloat32[2].Float, +				      imm->u.ImmediateFloat32[3].Float); +		} +			break; +		case TGSI_TOKEN_TYPE_DECLARATION: +		{ +			const struct tgsi_full_declaration *d; +			unsigned last; + +			d = &p.FullToken.FullDeclaration; +			last = d->DeclarationRange.Last; + +			switch (d->Declaration.File) { +			case TGSI_FILE_TEMPORARY: +				if (pc->temp_nr < (last + 1)) +					pc->temp_nr = last + 1; +				break; +			case TGSI_FILE_OUTPUT: +				if (pc->result_nr < (last + 1)) +					pc->result_nr = last + 1; +				break; +			case TGSI_FILE_INPUT: +				if (pc->attr_nr < (last + 1)) +					pc->attr_nr = last + 1; +				break; +			case TGSI_FILE_CONSTANT: +				if (pc->param_nr < (last + 1)) +					pc->param_nr = last + 1; +				break; +			case TGSI_FILE_SAMPLER: +				break; +			default: +				NOUVEAU_ERR("bad decl file %d\n", +					    d->Declaration.File); +				goto out_err; +			} +		} +			break; +		case TGSI_TOKEN_TYPE_INSTRUCTION: +			break; +		default: +			break; +		} +	} + +	if (pc->temp_nr) { +		pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg)); +		if (!pc->temp) +			goto out_err; + +		for (i = 0; i < pc->temp_nr; i++) { +			for (c = 0; c < 4; c++) { +				pc->temp[i*4+c].type = P_TEMP; +				pc->temp[i*4+c].hw = -1; +				pc->temp[i*4+c].index = i; +			} +		} +	} + +	if (pc->attr_nr) { +		struct nv50_reg *iv = NULL; +		int aid = 0; + +		pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg)); +		if (!pc->attr) +			goto out_err; + +		if (pc->p->type == PIPE_SHADER_FRAGMENT) { +			iv = alloc_temp(pc, NULL); +			emit_interp(pc, iv, iv, NULL); +			emit_flop(pc, 0, iv, iv); +			aid++; +		} + +		for (i = 0; i < pc->attr_nr; i++) { +			struct nv50_reg *a = &pc->attr[i*4]; + +			for (c = 0; c < 4; c++) { +				if (pc->p->type == PIPE_SHADER_FRAGMENT) { +					struct nv50_reg *at = +						alloc_temp(pc, NULL); +					pc->attr[i*4+c].type = at->type; +					pc->attr[i*4+c].hw = at->hw; +					pc->attr[i*4+c].index = at->index; +				} else { +					pc->p->cfg.vp.attr[aid/32] |= +						(1 << (aid % 32)); +					pc->attr[i*4+c].type = P_ATTR; +					pc->attr[i*4+c].hw = aid++; +					pc->attr[i*4+c].index = i; +				} +			} + +			if (pc->p->type != PIPE_SHADER_FRAGMENT) +				continue; + +			emit_interp(pc, &a[0], &a[0], iv); +			emit_interp(pc, &a[1], &a[1], iv); +			emit_interp(pc, &a[2], &a[2], iv); +			emit_interp(pc, &a[3], &a[3], iv); +		} + +		if (iv) +			free_temp(pc, iv); +	} + +	if (pc->result_nr) { +		int rid = 0; + +		pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg)); +		if (!pc->result) +			goto out_err; + +		for (i = 0; i < pc->result_nr; i++) { +			for (c = 0; c < 4; c++) { +				if (pc->p->type == PIPE_SHADER_FRAGMENT) { +					pc->result[i*4+c].type = P_TEMP; +					pc->result[i*4+c].hw = -1; +				} else { +					pc->result[i*4+c].type = P_RESULT; +					pc->result[i*4+c].hw = rid++; +				} +				pc->result[i*4+c].index = i; +			} +		} +	} + +	if (pc->param_nr) { +		int rid = 0; + +		pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg)); +		if (!pc->param) +			goto out_err; + +		for (i = 0; i < pc->param_nr; i++) { +			for (c = 0; c < 4; c++) { +				pc->param[i*4+c].type = P_CONST; +				pc->param[i*4+c].hw = rid++; +				pc->param[i*4+c].index = i; +			} +		} +	} + +	if (pc->immd_nr) { +		int rid = pc->param_nr * 4; + +		pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg)); +		if (!pc->immd) +			goto out_err; + +		for (i = 0; i < pc->immd_nr; i++) { +			for (c = 0; c < 4; c++) { +				pc->immd[i*4+c].type = P_IMMD; +				pc->immd[i*4+c].hw = rid++; +				pc->immd[i*4+c].index = i; +			} +		} +	} + +	ret = TRUE; +out_err: +	tgsi_parse_free(&p); +	return ret; +} + +static boolean +nv50_program_tx(struct nv50_program *p) +{ +	struct tgsi_parse_context parse; +	struct nv50_pc *pc; +	boolean ret; + +	pc = CALLOC_STRUCT(nv50_pc); +	if (!pc) +		return FALSE; +	pc->p = p; +	pc->p->cfg.high_temp = 4; + +	ret = nv50_program_tx_prep(pc); +	if (ret == FALSE) +		goto out_cleanup; + +	tgsi_parse_init(&parse, pc->p->pipe.tokens); +	while (!tgsi_parse_end_of_tokens(&parse)) { +		const union tgsi_full_token *tok = &parse.FullToken; + +		tgsi_parse_token(&parse); + +		switch (tok->Token.Type) { +		case TGSI_TOKEN_TYPE_INSTRUCTION: +			ret = nv50_program_tx_insn(pc, tok); +			if (ret == FALSE) +				goto out_err; +			break; +		default: +			break; +		} +	} + +	if (p->type == PIPE_SHADER_FRAGMENT) { +		struct nv50_reg out; + +		out.type = P_TEMP; +		for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++) +			emit_mov(pc, &out, &pc->result[out.hw]); +	} + +	assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head)); +	pc->p->exec_tail->inst[1] |= 0x00000001; + +	p->param_nr = pc->param_nr * 4; +	p->immd_nr = pc->immd_nr * 4; +	p->immd = pc->immd_buf; + +out_err: +	tgsi_parse_free(&parse); + +out_cleanup: +	return ret; +} + +static void +nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) +{ +	if (nv50_program_tx(p) == FALSE) +		assert(0); +	p->translated = TRUE; +} + +static void +nv50_program_upload_data(struct nv50_context *nv50, float *map, +			 unsigned start, unsigned count) +{ +	struct nouveau_channel *chan = nv50->screen->nvws->channel; +	struct nouveau_grobj *tesla = nv50->screen->tesla; + +	while (count) { +		unsigned nr = count > 2047 ? 2047 : count; + +		BEGIN_RING(chan, tesla, 0x00000f00, 1); +		OUT_RING  (chan, (NV50_CB_PMISC << 0) | (start << 8)); +		BEGIN_RING(chan, tesla, 0x40000f04, nr); +		OUT_RINGp (chan, map, nr); + +		map += nr; +		start += nr; +		count -= nr; +	} +} + +static void +nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) +{ +	struct nouveau_winsys *nvws = nv50->screen->nvws; +	struct pipe_winsys *ws = nv50->pipe.winsys; +	unsigned nr = p->param_nr + p->immd_nr; + +	if (!p->data && nr) { +		struct nouveau_resource *heap = nv50->screen->vp_data_heap; + +		if (nvws->res_alloc(heap, nr, p, &p->data)) { +			while (heap->next && heap->size < nr) { +				struct nv50_program *evict = heap->next->priv; +				nvws->res_free(&evict->data); +			} + +			if (nvws->res_alloc(heap, nr, p, &p->data)) +				assert(0); +		} +	} + +	if (p->param_nr) { +		float *map = ws->buffer_map(ws, nv50->constbuf[p->type], +					    PIPE_BUFFER_USAGE_CPU_READ); +		nv50_program_upload_data(nv50, map, p->data->start, +					 p->param_nr); +		ws->buffer_unmap(ws, nv50->constbuf[p->type]); +	} + +	if (p->immd_nr) { +		nv50_program_upload_data(nv50, p->immd, +					 p->data->start + p->param_nr, +					 p->immd_nr); +	} +} + +static void +nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) +{ +	struct nouveau_channel *chan = nv50->screen->nvws->channel; +	struct nouveau_grobj *tesla = nv50->screen->tesla; +	struct pipe_winsys *ws = nv50->pipe.winsys; +	struct nv50_program_exec *e; +	struct nouveau_stateobj *so; +	const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; +	unsigned start, count, *up, *ptr; +	boolean upload = FALSE; + +	if (!p->buffer) { +		p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4); +		upload = TRUE; +	} + +	if (p->data && p->data->start != p->data_start) { +		for (e = p->exec_head; e; e = e->next) { +			unsigned ei, ci; + +			if (e->param.index < 0) +				continue; +			ei = e->param.shift >> 5; +			ci = e->param.index + p->data->start; + +			e->inst[ei] &= ~e->param.mask; +			e->inst[ei] |= (ci << e->param.shift); +		} + +		p->data_start = p->data->start; +		upload = TRUE; +	} + +	if (!upload) +		return; + +#ifdef NV50_PROGRAM_DUMP +	NOUVEAU_ERR("-------\n"); +	up = ptr = MALLOC(p->exec_size * 4); +	for (e = p->exec_head; e; e = e->next) { +		NOUVEAU_ERR("0x%08x\n", e->inst[0]); +		if (is_long(e)) +			NOUVEAU_ERR("0x%08x\n", e->inst[1]); +	} + +#endif + +	up = ptr = MALLOC(p->exec_size * 4); +	for (e = p->exec_head; e; e = e->next) { +		*(ptr++) = e->inst[0]; +		if (is_long(e)) +			*(ptr++) = e->inst[1]; +	} + +	so = so_new(4,2); +	so_method(so, nv50->screen->tesla, 0x1280, 3); +	so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0); +	so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0); +	so_data  (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4)); + +	start = 0; count = p->exec_size; +	while (count) { +		struct nouveau_winsys *nvws = nv50->screen->nvws; +		unsigned nr; + +		so_emit(nvws, so); + +		nr = MIN2(count, 2047); +		nr = MIN2(nvws->channel->pushbuf->remaining, nr); +		if (nvws->channel->pushbuf->remaining < (nr + 3)) { +			FIRE_RING(chan); +			continue; +		} + +		BEGIN_RING(chan, tesla, 0x0f00, 1); +		OUT_RING  (chan, (start << 8) | NV50_CB_PUPLOAD); +		BEGIN_RING(chan, tesla, 0x40000f04, nr);	 +		OUT_RINGp (chan, up + start, nr); + +		start += nr; +		count -= nr; +	} + +	FREE(up); +	so_ref(NULL, &so); +} + +void +nv50_vertprog_validate(struct nv50_context *nv50) +{ +	struct nouveau_grobj *tesla = nv50->screen->tesla; +	struct nv50_program *p = nv50->vertprog; +	struct nouveau_stateobj *so; + +	if (!p->translated) { +		nv50_program_validate(nv50, p); +		if (!p->translated) +			assert(0); +	} + +	nv50_program_validate_data(nv50, p); +	nv50_program_validate_code(nv50, p); + +	so = so_new(13, 2); +	so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); +	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | +		  NOUVEAU_BO_HIGH, 0, 0); +	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | +		  NOUVEAU_BO_LOW, 0, 0); +	so_method(so, tesla, 0x1650, 2); +	so_data  (so, p->cfg.vp.attr[0]); +	so_data  (so, p->cfg.vp.attr[1]); +	so_method(so, tesla, 0x16b8, 1); +	so_data  (so, p->cfg.high_result); +	so_method(so, tesla, 0x16ac, 2); +	so_data  (so, p->cfg.high_result); //8); +	so_data  (so, p->cfg.high_temp); +	so_method(so, tesla, 0x140c, 1); +	so_data  (so, 0); /* program start offset */ +	so_ref(so, &nv50->state.vertprog); +} + +void +nv50_fragprog_validate(struct nv50_context *nv50) +{ +	struct nouveau_grobj *tesla = nv50->screen->tesla; +	struct nv50_program *p = nv50->fragprog; +	struct nouveau_stateobj *so; + +	if (!p->translated) { +		nv50_program_validate(nv50, p); +		if (!p->translated) +			assert(0); +	} + +	nv50_program_validate_data(nv50, p); +	nv50_program_validate_code(nv50, p); + +	so = so_new(64, 2); +	so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); +	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | +		  NOUVEAU_BO_HIGH, 0, 0); +	so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | +		  NOUVEAU_BO_LOW, 0, 0); +	so_method(so, tesla, 0x1904, 4); +	so_data  (so, 0x00040404); /* p: 0x01000404 */ +	so_data  (so, 0x00000004); +	so_data  (so, 0x00000000); +	so_data  (so, 0x00000000); +	so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */ +	so_data  (so, 0x03020100); +	so_data  (so, 0x07060504); +	so_data  (so, 0x0b0a0908); +	so_method(so, tesla, 0x1988, 2); +	so_data  (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */ +	so_data  (so, p->cfg.high_temp); +	so_method(so, tesla, 0x1414, 1); +	so_data  (so, 0); /* program start offset */ +	so_ref(so, &nv50->state.fragprog); +} + +void +nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) +{ +	struct pipe_screen *pscreen = nv50->pipe.screen; + +	while (p->exec_head) { +		struct nv50_program_exec *e = p->exec_head; + +		p->exec_head = e->next; +		FREE(e); +	} +	p->exec_tail = NULL; +	p->exec_size = 0; + +	if (p->buffer) +		pipe_buffer_reference(pscreen, &p->buffer, NULL); + +	nv50->screen->nvws->res_free(&p->data); + +	p->translated = 0; +} + diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h new file mode 100644 index 0000000000..78deed6a38 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -0,0 +1,45 @@ +#ifndef __NV50_PROGRAM_H__ +#define __NV50_PROGRAM_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv50_program_exec { +	struct nv50_program_exec *next; + +	unsigned inst[2]; +	struct { +		int index; +		unsigned mask; +		unsigned shift; +	} param; +}; + +struct nv50_program { +	struct pipe_shader_state pipe; +	struct tgsi_shader_info info; +	boolean translated; + +	unsigned type; +	struct nv50_program_exec *exec_head; +	struct nv50_program_exec *exec_tail; +	unsigned exec_size; +	struct nouveau_resource *data; +	unsigned data_start; + +	struct pipe_buffer *buffer; + +	float *immd; +	unsigned immd_nr; +	unsigned param_nr; + +	struct { +		unsigned high_temp; +		unsigned high_result; +		struct { +			unsigned attr[2]; +		} vp; +	} cfg; +}; + +#endif diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c new file mode 100644 index 0000000000..20745ceab8 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -0,0 +1,134 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" + +#include "nv50_context.h" + +struct nv50_query { +	struct pipe_buffer *buffer; +	unsigned type; +	boolean ready; +	uint64_t result; +}; + +static INLINE struct nv50_query * +nv50_query(struct pipe_query *pipe) +{ +	return (struct nv50_query *)pipe; +} + +static struct pipe_query * +nv50_query_create(struct pipe_context *pipe, unsigned type) +{ +	struct pipe_winsys *ws = pipe->winsys; +	struct nv50_query *q = CALLOC_STRUCT(nv50_query); + +	assert (q->type == PIPE_QUERY_OCCLUSION_COUNTER); +	q->type = type; + +	q->buffer = ws->buffer_create(ws, 256, 0, 16); +	if (!q->buffer) { +		FREE(q); +		return NULL; +	} + +	return (struct pipe_query *)q; +} + +static void +nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ +	struct nv50_query *q = nv50_query(pq); + +	if (q) { +		pipe_buffer_reference(pipe->screen, &q->buffer, NULL); +		FREE(q); +	} +} + +static void +nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ +	struct nv50_context *nv50 = nv50_context(pipe); +	struct nouveau_channel *chan = nv50->screen->nvws->channel; +	struct nouveau_grobj *tesla = nv50->screen->tesla; +	struct nv50_query *q = nv50_query(pq); + +	BEGIN_RING(chan, tesla, 0x1530, 1); +	OUT_RING  (chan, 1); +	BEGIN_RING(chan, tesla, 0x1514, 1); +	OUT_RING  (chan, 1); + +	q->ready = FALSE; +} + +static void +nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ +	struct nv50_context *nv50 = nv50_context(pipe); +	struct nouveau_channel *chan = nv50->screen->nvws->channel; +	struct nouveau_grobj *tesla = nv50->screen->tesla; +	struct nv50_query *q = nv50_query(pq); + +	WAIT_RING (chan, 5); +	BEGIN_RING(chan, tesla, 0x1b00, 4); +	OUT_RELOCh(chan, q->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	OUT_RELOCl(chan, q->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); +	OUT_RING  (chan, 0x00000000); +	OUT_RING  (chan, 0x0100f002); +	FIRE_RING (chan); +} + +static boolean +nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, +		  boolean wait, uint64_t *result) +{ +	struct pipe_winsys *ws = pipe->winsys; +	struct nv50_query *q = nv50_query(pq); + +	/*XXX: Want to be able to return FALSE here instead of blocking +	 *     until the result is available.. +	 */ + +	if (!q->ready) { +		uint32_t *map = ws->buffer_map(ws, q->buffer, +					       PIPE_BUFFER_USAGE_CPU_READ); +		q->result = map[1]; +		q->ready = TRUE; +		ws->buffer_unmap(ws, q->buffer); +	} + +	*result = q->result; +	return q->ready; +} + +void +nv50_init_query_functions(struct nv50_context *nv50) +{ +	nv50->pipe.create_query = nv50_query_create; +	nv50->pipe.destroy_query = nv50_query_destroy; +	nv50->pipe.begin_query = nv50_query_begin; +	nv50->pipe.end_query = nv50_query_end; +	nv50->pipe.get_query_result = nv50_query_result; +} diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c new file mode 100644 index 0000000000..58d7a621a8 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -0,0 +1,356 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_screen.h" + +#include "util/u_simple_screen.h" + +#include "nv50_context.h" +#include "nv50_screen.h" + +#include "nouveau/nouveau_stateobj.h" + +#define NV5X_GRCLASS5097_CHIPSETS 0x00000001 +#define NV8X_GRCLASS8297_CHIPSETS 0x00000050 +#define NV9X_GRCLASS8297_CHIPSETS 0x00000014 + +static boolean +nv50_screen_is_format_supported(struct pipe_screen *pscreen, +				enum pipe_format format, +				enum pipe_texture_target target, +				unsigned tex_usage, unsigned geom_flags) +{ +	if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { +		switch (format) { +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_R5G6B5_UNORM: +		case PIPE_FORMAT_Z24S8_UNORM: +		case PIPE_FORMAT_Z16_UNORM: +			return TRUE; +		default: +			break; +		} +	} else { +		switch (format) { +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_A1R5G5B5_UNORM: +		case PIPE_FORMAT_A4R4G4B4_UNORM: +		case PIPE_FORMAT_R5G6B5_UNORM: +		case PIPE_FORMAT_L8_UNORM: +		case PIPE_FORMAT_A8_UNORM: +		case PIPE_FORMAT_I8_UNORM: +		case PIPE_FORMAT_A8L8_UNORM: +		case PIPE_FORMAT_DXT1_RGB: +		case PIPE_FORMAT_DXT1_RGBA: +		case PIPE_FORMAT_DXT3_RGBA: +		case PIPE_FORMAT_DXT5_RGBA: +			return TRUE; +		default: +			break; +		} +	} + +	return FALSE; +} + +static const char * +nv50_screen_get_name(struct pipe_screen *pscreen) +{ +	struct nv50_screen *screen = nv50_screen(pscreen); +	struct nouveau_device *dev = screen->nvws->channel->device; +	static char buffer[128]; + +	snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); +	return buffer; +} + +static const char * +nv50_screen_get_vendor(struct pipe_screen *pscreen) +{ +	return "nouveau"; +} + +static int +nv50_screen_get_param(struct pipe_screen *pscreen, int param) +{ +	switch (param) { +	case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: +		return 32; +	case PIPE_CAP_NPOT_TEXTURES: +		return 1; +	case PIPE_CAP_TWO_SIDED_STENCIL: +		return 1; +	case PIPE_CAP_GLSL: +		return 0; +	case PIPE_CAP_S3TC: +		return 1; +	case PIPE_CAP_ANISOTROPIC_FILTER: +		return 1; +	case PIPE_CAP_POINT_SPRITE: +		return 0; +	case PIPE_CAP_MAX_RENDER_TARGETS: +		return 8; +	case PIPE_CAP_OCCLUSION_QUERY: +		return 1; +	case PIPE_CAP_TEXTURE_SHADOW_MAP: +		return 1; +	case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: +		return 13; +	case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: +		return 10; +	case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: +		return 13; +	case PIPE_CAP_TEXTURE_MIRROR_CLAMP: +	case PIPE_CAP_TEXTURE_MIRROR_REPEAT: +		return 1; +	case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: +		return 0; +	case NOUVEAU_CAP_HW_VTXBUF:	 +		return 1; +	case NOUVEAU_CAP_HW_IDXBUF:	 +		return 0; +	default: +		NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); +		return 0; +	} +} + +static float +nv50_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ +	switch (param) { +	case PIPE_CAP_MAX_LINE_WIDTH: +	case PIPE_CAP_MAX_LINE_WIDTH_AA: +		return 10.0; +	case PIPE_CAP_MAX_POINT_WIDTH: +	case PIPE_CAP_MAX_POINT_WIDTH_AA: +		return 64.0; +	case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: +		return 16.0; +	case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: +		return 4.0; +	default: +		NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); +		return 0.0; +	} +} + +static void +nv50_screen_destroy(struct pipe_screen *pscreen) +{ +	FREE(pscreen); +} + +struct pipe_screen * +nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ +	struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen); +	struct nouveau_stateobj *so; +	unsigned tesla_class = 0, ret; +	unsigned chipset = nvws->channel->device->chipset; +	int i; + +	if (!screen) +		return NULL; +	screen->nvws = nvws; + +	/* 2D object */ +	ret = nvws->grobj_alloc(nvws, NV50_2D, &screen->eng2d); +	if (ret) { +		NOUVEAU_ERR("Error creating 2D object: %d\n", ret); +		nv50_screen_destroy(&screen->pipe); +		return NULL; +	} + +	/* 3D object */ +	if ((chipset & 0xf0) != 0x50 && (chipset & 0xf0) != 0x80) { +		NOUVEAU_ERR("Not a G8x chipset\n"); +		nv50_screen_destroy(&screen->pipe); +		return NULL; +	} + +	switch (chipset & 0xf0) { +	case 0x50: +		if (NV5X_GRCLASS5097_CHIPSETS & (1 << (chipset & 0x0f))) +			tesla_class = 0x5097; +		break; +	case 0x80: +		if (NV8X_GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f))) +			tesla_class = 0x8297; +		break; +	case 0x90: +		if (NV9X_GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f))) +			tesla_class = 0x8297; +		break; +	default: +		break; +	} + +	if (tesla_class == 0) { +		NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset); +		nv50_screen_destroy(&screen->pipe); +		return NULL; +	} + +	ret = nvws->grobj_alloc(nvws, tesla_class, &screen->tesla); +	if (ret) { +		NOUVEAU_ERR("Error creating 3D object: %d\n", ret); +		nv50_screen_destroy(&screen->pipe); +		return NULL; +	} + +	/* Sync notifier */ +	ret = nvws->notifier_alloc(nvws, 1, &screen->sync); +	if (ret) { +		NOUVEAU_ERR("Error creating notifier object: %d\n", ret); +		nv50_screen_destroy(&screen->pipe); +		return NULL; +	} + +	/* Static 2D init */ +	so = so_new(64, 0); +	so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4); +	so_data  (so, screen->sync->handle); +	so_data  (so, screen->nvws->channel->vram->handle); +	so_data  (so, screen->nvws->channel->vram->handle); +	so_data  (so, screen->nvws->channel->vram->handle); +	so_method(so, screen->eng2d, NV50_2D_OPERATION, 1); +	so_data  (so, NV50_2D_OPERATION_SRCCOPY); +	so_method(so, screen->eng2d, 0x0290, 1); +	so_data  (so, 0); +	so_method(so, screen->eng2d, 0x0888, 1); +	so_data  (so, 1); +	so_emit(nvws, so); +	so_ref(NULL, &so); + +	/* Static tesla init */ +	so = so_new(256, 20); + +	so_method(so, screen->tesla, 0x1558, 1); +	so_data  (so, 1); +	so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1); +	so_data  (so, screen->sync->handle); +	so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0), +				     NV50TCL_DMA_UNK0__SIZE); +	for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++) +		so_data(so, nvws->channel->vram->handle); +	so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0), +				     NV50TCL_DMA_UNK1__SIZE); +	for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++) +		so_data(so, nvws->channel->vram->handle); +	so_method(so, screen->tesla, 0x121c, 1); +	so_data  (so, 1); + +	so_method(so, screen->tesla, 0x13bc, 1); +	so_data  (so, 0x54); +	so_method(so, screen->tesla, 0x13ac, 1); +	so_data  (so, 1); +	so_method(so, screen->tesla, 0x16b8, 1); +	so_data  (so, 8); + +	/* Shared constant buffer */ +	screen->constbuf = ws->buffer_create(ws, 0, 0, 128 * 4 * 4); +	if (nvws->res_init(&screen->vp_data_heap, 0, 128)) { +		NOUVEAU_ERR("Error initialising constant buffer\n"); +		nv50_screen_destroy(&screen->pipe); +		return NULL; +	} + +	so_method(so, screen->tesla, 0x1280, 3); +	so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | +		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); +	so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | +		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); +	so_data  (so, (NV50_CB_PMISC << 16) | 0x00001000); + +	/* Texture sampler/image unit setup - we abuse the constant buffer +	 * upload mechanism for the moment to upload data to the tex config +	 * blocks.  At some point we *may* want to go the NVIDIA way of doing +	 * things? +	 */ +	screen->tic = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); +	so_method(so, screen->tesla, 0x1280, 3); +	so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | +		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); +	so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | +		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); +	so_data  (so, (NV50_CB_TIC << 16) | 0x0800); +	so_method(so, screen->tesla, 0x1574, 3); +	so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | +		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); +	so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | +		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); +	so_data  (so, 0x00000800); + +	screen->tsc = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); +	so_method(so, screen->tesla, 0x1280, 3); +	so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | +		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); +	so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | +		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); +	so_data  (so, (NV50_CB_TSC << 16) | 0x0800); +	so_method(so, screen->tesla, 0x155c, 3); +	so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | +		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); +	so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | +		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); +	so_data  (so, 0x00000800); + + +	/* Vertex array limits - max them out */ +	for (i = 0; i < 16; i++) { +		so_method(so, screen->tesla, 0x1080 + (i * 8), 2); +		so_data  (so, 0x000000ff); +		so_data  (so, 0xffffffff); +	} + +	so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2); +	so_data  (so, fui(0.0)); +	so_data  (so, fui(1.0)); + +	so_method(so, screen->tesla, 0x1234, 1); +	so_data  (so, 1); +	so_method(so, screen->tesla, 0x1458, 1); +	so_data  (so, 1); + +	so_emit(nvws, so); +	so_ref(so, &screen->static_init); +	nvws->push_flush(nvws, 0, NULL); + +	screen->pipe.winsys = ws; + +	screen->pipe.destroy = nv50_screen_destroy; + +	screen->pipe.get_name = nv50_screen_get_name; +	screen->pipe.get_vendor = nv50_screen_get_vendor; +	screen->pipe.get_param = nv50_screen_get_param; +	screen->pipe.get_paramf = nv50_screen_get_paramf; + +	screen->pipe.is_format_supported = nv50_screen_is_format_supported; + +	nv50_screen_init_miptree_functions(&screen->pipe); +	nv50_surface_init_screen_functions(&screen->pipe); +	u_simple_screen_init(&screen->pipe); + +	return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h new file mode 100644 index 0000000000..c888ca071c --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -0,0 +1,34 @@ +#ifndef __NV50_SCREEN_H__ +#define __NV50_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv50_screen { +	struct pipe_screen pipe; + +	struct nouveau_winsys *nvws; + +	unsigned cur_pctx; + +	struct nouveau_grobj *tesla; +	struct nouveau_grobj *eng2d; +	struct nouveau_notifier *sync; + +	struct pipe_buffer *constbuf; +	struct nouveau_resource *vp_data_heap; + +	struct pipe_buffer *tic; +	struct pipe_buffer *tsc; + +	struct nouveau_stateobj *static_init; +}; + +static INLINE struct nv50_screen * +nv50_screen(struct pipe_screen *screen) +{ +	return (struct nv50_screen *)screen; +} + +void nv50_surface_init_screen_functions(struct pipe_screen *); + +#endif diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c new file mode 100644 index 0000000000..787ff958ec --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -0,0 +1,664 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv50_context.h" +#include "nv50_texture.h" + +#include "nouveau/nouveau_stateobj.h" + +static void * +nv50_blend_state_create(struct pipe_context *pipe, +			const struct pipe_blend_state *cso) +{ +	struct nouveau_stateobj *so = so_new(64, 0); +	struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; +	struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj); +	unsigned cmask = 0, i; + +	/*XXX ignored: +	 * 	- dither +	 */ + +	if (cso->blend_enable == 0) { +		so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8); +		for (i = 0; i < 8; i++) +			so_data(so, 0); +	} else { +		so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8); +		for (i = 0; i < 8; i++) +			so_data(so, 1); +		so_method(so, tesla, NV50TCL_BLEND_EQUATION_RGB, 5); +		so_data  (so, nvgl_blend_eqn(cso->rgb_func)); +		so_data  (so, 0x4000 | nvgl_blend_func(cso->rgb_src_factor)); +		so_data  (so, 0x4000 | nvgl_blend_func(cso->rgb_dst_factor)); +		so_data  (so, nvgl_blend_eqn(cso->alpha_func)); +		so_data  (so, 0x4000 | nvgl_blend_func(cso->alpha_src_factor)); +		so_method(so, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1); +		so_data  (so, 0x4000 | nvgl_blend_func(cso->alpha_dst_factor)); +	} + +	if (cso->logicop_enable == 0 ) { +		so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 1); +		so_data  (so, 0); +	} else { +		so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 2); +		so_data  (so, 1); +		so_data  (so, nvgl_logicop_func(cso->logicop_func)); +	} + +	if (cso->colormask & PIPE_MASK_R) +		cmask |= (1 << 0); +	if (cso->colormask & PIPE_MASK_G) +		cmask |= (1 << 4); +	if (cso->colormask & PIPE_MASK_B) +		cmask |= (1 << 8); +	if (cso->colormask & PIPE_MASK_A) +		cmask |= (1 << 12); +	so_method(so, tesla, NV50TCL_COLOR_MASK(0), 8); +	for (i = 0; i < 8; i++) +		so_data(so, cmask); + +	bso->pipe = *cso; +	so_ref(so, &bso->so); +	return (void *)bso; +} + +static void +nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv50_context *nv50 = nv50_context(pipe); + +	nv50->blend = hwcso; +	nv50->dirty |= NV50_NEW_BLEND; +} + +static void +nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv50_blend_stateobj *bso = hwcso; + +	so_ref(NULL, &bso->so); +	FREE(bso); +} + +static INLINE unsigned +wrap_mode(unsigned wrap) +{ +	switch (wrap) { +	case PIPE_TEX_WRAP_REPEAT: +		return NV50TSC_1_0_WRAPS_REPEAT; +	case PIPE_TEX_WRAP_MIRROR_REPEAT: +		return NV50TSC_1_0_WRAPS_MIRROR_REPEAT; +	case PIPE_TEX_WRAP_CLAMP_TO_EDGE: +		return NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE; +	case PIPE_TEX_WRAP_CLAMP_TO_BORDER: +		return NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER; +	case PIPE_TEX_WRAP_CLAMP: +		return NV50TSC_1_0_WRAPS_CLAMP; +	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: +		return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE; +	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: +		return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER; +	case PIPE_TEX_WRAP_MIRROR_CLAMP: +		return NV50TSC_1_0_WRAPS_MIRROR_CLAMP; +	default: +		NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); +		return NV50TSC_1_0_WRAPS_REPEAT; +	} +} +static void * +nv50_sampler_state_create(struct pipe_context *pipe, +			  const struct pipe_sampler_state *cso) +{ +	unsigned *tsc = CALLOC(8, sizeof(unsigned)); + +	tsc[0] = (0x00024000 | +		  (wrap_mode(cso->wrap_s) << 0) | +		  (wrap_mode(cso->wrap_t) << 3) | +		  (wrap_mode(cso->wrap_r) << 6)); + +	switch (cso->mag_img_filter) { +	case PIPE_TEX_FILTER_LINEAR: +		tsc[1] |= NV50TSC_1_1_MAGF_LINEAR; +		break; +	case PIPE_TEX_FILTER_NEAREST: +	default: +		tsc[1] |= NV50TSC_1_1_MAGF_NEAREST; +		break; +	} + +	switch (cso->min_img_filter) { +	case PIPE_TEX_FILTER_LINEAR: +		tsc[1] |= NV50TSC_1_1_MINF_LINEAR; +		break; +	case PIPE_TEX_FILTER_NEAREST: +	default: +		tsc[1] |= NV50TSC_1_1_MINF_NEAREST; +		break; +	} + +	switch (cso->min_mip_filter) { +	case PIPE_TEX_MIPFILTER_LINEAR: +		tsc[1] |= NV50TSC_1_1_MIPF_LINEAR; +		break; +	case PIPE_TEX_MIPFILTER_NEAREST: +		tsc[1] |= NV50TSC_1_1_MIPF_NEAREST; +		break; +	case PIPE_TEX_MIPFILTER_NONE: +	default: +		tsc[1] |= NV50TSC_1_1_MIPF_NONE; +		break; +	} + +	if (cso->max_anisotropy >= 16.0) +		tsc[0] |= (7 << 20); +	else +	if (cso->max_anisotropy >= 12.0) +		tsc[0] |= (6 << 20); +	else +	if (cso->max_anisotropy >= 10.0) +		tsc[0] |= (5 << 20); +	else +	if (cso->max_anisotropy >= 8.0) +		tsc[0] |= (4 << 20); +	else +	if (cso->max_anisotropy >= 6.0) +		tsc[0] |= (3 << 20); +	else +	if (cso->max_anisotropy >= 4.0) +		tsc[0] |= (2 << 20); +	else +	if (cso->max_anisotropy >= 2.0) +		tsc[0] |= (1 << 20); + +	if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { +		tsc[0] |= (1 << 8); +		tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7); +	} + +	return (void *)tsc; +} + +static void +nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ +	struct nv50_context *nv50 = nv50_context(pipe); +	int i; + +	nv50->sampler_nr = nr; +	for (i = 0; i < nv50->sampler_nr; i++) +		nv50->sampler[i] = sampler[i]; + +	nv50->dirty |= NV50_NEW_SAMPLER; +} + +static void +nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	FREE(hwcso); +} + +static void +nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr, +			 struct pipe_texture **pt) +{ +	struct nv50_context *nv50 = nv50_context(pipe); +	int i; + +	for (i = 0; i < nr; i++) +		pipe_texture_reference((void *)&nv50->miptree[i], pt[i]); +	for (i = nr; i < nv50->miptree_nr; i++) +		pipe_texture_reference((void *)&nv50->miptree[i], NULL); + +	nv50->miptree_nr = nr; +	nv50->dirty |= NV50_NEW_TEXTURE; +} + +static void * +nv50_rasterizer_state_create(struct pipe_context *pipe, +			     const struct pipe_rasterizer_state *cso) +{ +	struct nouveau_stateobj *so = so_new(64, 0); +	struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; +	struct nv50_rasterizer_stateobj *rso = +		CALLOC_STRUCT(nv50_rasterizer_stateobj); + +	/*XXX: ignored +	 * 	- light_twosize +	 * 	- point_smooth +	 * 	- multisample +	 * 	- point_sprite / sprite_coord_mode +	 */ + +	so_method(so, tesla, NV50TCL_SHADE_MODEL, 1); +	so_data  (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT : +				       NV50TCL_SHADE_MODEL_SMOOTH); + +	so_method(so, tesla, NV50TCL_LINE_WIDTH, 1); +	so_data  (so, fui(cso->line_width)); +	so_method(so, tesla, NV50TCL_LINE_SMOOTH_ENABLE, 1); +	so_data  (so, cso->line_smooth ? 1 : 0); +	if (cso->line_stipple_enable) { +		so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1); +		so_data  (so, 1); +		so_method(so, tesla, NV50TCL_LINE_STIPPLE_PATTERN, 1); +		so_data  (so, (cso->line_stipple_pattern << 8) | +			       cso->line_stipple_factor); +	} else { +		so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1); +		so_data  (so, 0); +	} + +	so_method(so, tesla, NV50TCL_POINT_SIZE, 1); +	so_data  (so, fui(cso->point_size)); + +	so_method(so, tesla, NV50TCL_POLYGON_MODE_FRONT, 3); +	if (cso->front_winding == PIPE_WINDING_CCW) { +		so_data(so, nvgl_polygon_mode(cso->fill_ccw)); +		so_data(so, nvgl_polygon_mode(cso->fill_cw)); +	} else { +		so_data(so, nvgl_polygon_mode(cso->fill_cw)); +		so_data(so, nvgl_polygon_mode(cso->fill_ccw)); +	} +	so_data(so, cso->poly_smooth ? 1 : 0); + +	so_method(so, tesla, NV50TCL_CULL_FACE_ENABLE, 3); +	so_data  (so, cso->cull_mode != PIPE_WINDING_NONE); +	if (cso->front_winding == PIPE_WINDING_CCW) { +		so_data(so, NV50TCL_FRONT_FACE_CCW); +		switch (cso->cull_mode) { +		case PIPE_WINDING_CCW: +			so_data(so, NV50TCL_CULL_FACE_FRONT); +			break; +		case PIPE_WINDING_CW: +			so_data(so, NV50TCL_CULL_FACE_BACK); +			break; +		case PIPE_WINDING_BOTH: +			so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK); +			break; +		default: +			so_data(so, NV50TCL_CULL_FACE_BACK); +			break; +		} +	} else { +		so_data(so, NV50TCL_FRONT_FACE_CW); +		switch (cso->cull_mode) { +		case PIPE_WINDING_CCW: +			so_data(so, NV50TCL_CULL_FACE_BACK); +			break; +		case PIPE_WINDING_CW: +			so_data(so, NV50TCL_CULL_FACE_FRONT); +			break; +		case PIPE_WINDING_BOTH: +			so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK); +			break; +		default: +			so_data(so, NV50TCL_CULL_FACE_BACK); +			break; +		} +	} + +	so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_ENABLE, 1); +	so_data  (so, cso->poly_stipple_enable ? 1 : 0); + +	so_method(so, tesla, NV50TCL_POLYGON_OFFSET_POINT_ENABLE, 3); +	if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || +	    (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) +		so_data(so, 1); +	else +		so_data(so, 0); +	if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || +	    (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) +		so_data(so, 1); +	else +		so_data(so, 0); +	if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || +	    (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) +		so_data(so, 1); +	else +		so_data(so, 0); + +	if (cso->offset_cw || cso->offset_ccw) { +		so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1); +		so_data  (so, fui(cso->offset_scale)); +		so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1); +		so_data  (so, fui(cso->offset_units)); +	} + +	rso->pipe = *cso; +	so_ref(so, &rso->so); +	return (void *)rso; +} + +static void +nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv50_context *nv50 = nv50_context(pipe); + +	nv50->rasterizer = hwcso; +	nv50->dirty |= NV50_NEW_RASTERIZER; +} + +static void +nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv50_rasterizer_stateobj *rso = hwcso; + +	so_ref(NULL, &rso->so); +	FREE(rso); +} + +static void * +nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, +			const struct pipe_depth_stencil_alpha_state *cso) +{ +	struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; +	struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj); +	struct nouveau_stateobj *so = so_new(64, 0); + +	so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1); +	so_data  (so, cso->depth.writemask ? 1 : 0); +	if (cso->depth.enabled) { +		so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1); +		so_data  (so, 1); +		so_method(so, tesla, NV50TCL_DEPTH_TEST_FUNC, 1); +		so_data  (so, nvgl_comparison_op(cso->depth.func)); +	} else { +		so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1); +		so_data  (so, 0); +	} + +	/*XXX: yes, I know they're backwards.. header needs fixing */ +	if (cso->stencil[0].enabled) { +		so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5); +		so_data  (so, 1); +		so_data  (so, nvgl_stencil_op(cso->stencil[0].fail_op)); +		so_data  (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); +		so_data  (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); +		so_data  (so, nvgl_comparison_op(cso->stencil[0].func)); +		so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3); +		so_data  (so, cso->stencil[0].ref_value); +		so_data  (so, cso->stencil[0].writemask); +		so_data  (so, cso->stencil[0].valuemask); +	} else { +		so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1); +		so_data  (so, 0); +	} + +	if (cso->stencil[1].enabled) { +		so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8); +		so_data  (so, 1); +		so_data  (so, nvgl_stencil_op(cso->stencil[1].fail_op)); +		so_data  (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); +		so_data  (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); +		so_data  (so, nvgl_comparison_op(cso->stencil[1].func)); +		so_data  (so, cso->stencil[1].ref_value); +		so_data  (so, cso->stencil[1].writemask); +		so_data  (so, cso->stencil[1].valuemask); +	} else { +		so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1); +		so_data  (so, 0); +	} + +	if (cso->alpha.enabled) { +		so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1); +		so_data  (so, 1); +		so_method(so, tesla, NV50TCL_ALPHA_TEST_REF, 2); +		so_data  (so, fui(cso->alpha.ref_value)); +		so_data  (so, nvgl_comparison_op(cso->alpha.func)); +	} else { +		so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1); +		so_data  (so, 0); +	} + +	zsa->pipe = *cso; +	so_ref(so, &zsa->so); +	return (void *)zsa; +} + +static void +nv50_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv50_context *nv50 = nv50_context(pipe); + +	nv50->zsa = hwcso; +	nv50->dirty |= NV50_NEW_ZSA; +} + +static void +nv50_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv50_zsa_stateobj *zsa = hwcso; + +	so_ref(NULL, &zsa->so); +	FREE(zsa); +} + +static void * +nv50_vp_state_create(struct pipe_context *pipe, +		     const struct pipe_shader_state *cso) +{ +	struct nv50_program *p = CALLOC_STRUCT(nv50_program); + +	p->pipe.tokens = tgsi_dup_tokens(cso->tokens); +	p->type = PIPE_SHADER_VERTEX; +	tgsi_scan_shader(p->pipe.tokens, &p->info); +	return (void *)p; +} + +static void +nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv50_context *nv50 = nv50_context(pipe); + +	nv50->vertprog = hwcso; +	nv50->dirty |= NV50_NEW_VERTPROG; +} + +static void +nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv50_context *nv50 = nv50_context(pipe); +	struct nv50_program *p = hwcso; + +	nv50_program_destroy(nv50, p); +	FREE((void*)p->pipe.tokens); +	FREE(p); +} + +static void * +nv50_fp_state_create(struct pipe_context *pipe, +		     const struct pipe_shader_state *cso) +{ +	struct nv50_program *p = CALLOC_STRUCT(nv50_program); + +	p->pipe.tokens = tgsi_dup_tokens(cso->tokens); +	p->type = PIPE_SHADER_FRAGMENT; +	tgsi_scan_shader(p->pipe.tokens, &p->info); +	return (void *)p; +} + +static void +nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv50_context *nv50 = nv50_context(pipe); + +	nv50->fragprog = hwcso; +	nv50->dirty |= NV50_NEW_FRAGPROG; +} + +static void +nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv50_context *nv50 = nv50_context(pipe); +	struct nv50_program *p = hwcso; + +	nv50_program_destroy(nv50, p); +	FREE((void*)p->pipe.tokens); +	FREE(p); +} + +static void +nv50_set_blend_color(struct pipe_context *pipe, +		     const struct pipe_blend_color *bcol) +{ +	struct nv50_context *nv50 = nv50_context(pipe); + +	nv50->blend_colour = *bcol; +	nv50->dirty |= NV50_NEW_BLEND_COLOUR; +} + +static void +nv50_set_clip_state(struct pipe_context *pipe, +		    const struct pipe_clip_state *clip) +{ +} + +static void +nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, +			 const struct pipe_constant_buffer *buf ) +{ +	struct nv50_context *nv50 = nv50_context(pipe); + +	if (shader == PIPE_SHADER_VERTEX) { +		nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; +		nv50->dirty |= NV50_NEW_VERTPROG_CB; +	} else +	if (shader == PIPE_SHADER_FRAGMENT) { +		nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; +		nv50->dirty |= NV50_NEW_FRAGPROG_CB; +	} +} + +static void +nv50_set_framebuffer_state(struct pipe_context *pipe, +			   const struct pipe_framebuffer_state *fb) +{ +	struct nv50_context *nv50 = nv50_context(pipe); + +	nv50->framebuffer = *fb; +	nv50->dirty |= NV50_NEW_FRAMEBUFFER; +} + +static void +nv50_set_polygon_stipple(struct pipe_context *pipe, +			 const struct pipe_poly_stipple *stipple) +{ +	struct nv50_context *nv50 = nv50_context(pipe); + +	nv50->stipple = *stipple; +	nv50->dirty |= NV50_NEW_STIPPLE; +} + +static void +nv50_set_scissor_state(struct pipe_context *pipe, +		       const struct pipe_scissor_state *s) +{ +	struct nv50_context *nv50 = nv50_context(pipe); + +	nv50->scissor = *s; +	nv50->dirty |= NV50_NEW_SCISSOR; +} + +static void +nv50_set_viewport_state(struct pipe_context *pipe, +			const struct pipe_viewport_state *vpt) +{ +	struct nv50_context *nv50 = nv50_context(pipe); + +	nv50->viewport = *vpt; +	nv50->dirty |= NV50_NEW_VIEWPORT; +} + +static void +nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count, +			const struct pipe_vertex_buffer *vb) +{ +	struct nv50_context *nv50 = nv50_context(pipe); + +	memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count); +	nv50->vtxbuf_nr = count; + +	nv50->dirty |= NV50_NEW_ARRAYS; +} + +static void +nv50_set_vertex_elements(struct pipe_context *pipe, unsigned count, +			 const struct pipe_vertex_element *ve) +{ +	struct nv50_context *nv50 = nv50_context(pipe); + +	memcpy(nv50->vtxelt, ve, sizeof(*ve) * count); +	nv50->vtxelt_nr = count; + +	nv50->dirty |= NV50_NEW_ARRAYS; +} + +void +nv50_init_state_functions(struct nv50_context *nv50) +{ +	nv50->pipe.create_blend_state = nv50_blend_state_create; +	nv50->pipe.bind_blend_state = nv50_blend_state_bind; +	nv50->pipe.delete_blend_state = nv50_blend_state_delete; + +	nv50->pipe.create_sampler_state = nv50_sampler_state_create; +	nv50->pipe.bind_sampler_states = nv50_sampler_state_bind; +	nv50->pipe.delete_sampler_state = nv50_sampler_state_delete; +	nv50->pipe.set_sampler_textures = nv50_set_sampler_texture; + +	nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create; +	nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind; +	nv50->pipe.delete_rasterizer_state = nv50_rasterizer_state_delete; + +	nv50->pipe.create_depth_stencil_alpha_state = +		nv50_depth_stencil_alpha_state_create; +	nv50->pipe.bind_depth_stencil_alpha_state = +		nv50_depth_stencil_alpha_state_bind; +	nv50->pipe.delete_depth_stencil_alpha_state = +		nv50_depth_stencil_alpha_state_delete; + +	nv50->pipe.create_vs_state = nv50_vp_state_create; +	nv50->pipe.bind_vs_state = nv50_vp_state_bind; +	nv50->pipe.delete_vs_state = nv50_vp_state_delete; + +	nv50->pipe.create_fs_state = nv50_fp_state_create; +	nv50->pipe.bind_fs_state = nv50_fp_state_bind; +	nv50->pipe.delete_fs_state = nv50_fp_state_delete; + +	nv50->pipe.set_blend_color = nv50_set_blend_color; +	nv50->pipe.set_clip_state = nv50_set_clip_state; +	nv50->pipe.set_constant_buffer = nv50_set_constant_buffer; +	nv50->pipe.set_framebuffer_state = nv50_set_framebuffer_state; +	nv50->pipe.set_polygon_stipple = nv50_set_polygon_stipple; +	nv50->pipe.set_scissor_state = nv50_set_scissor_state; +	nv50->pipe.set_viewport_state = nv50_set_viewport_state; + +	nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers; +	nv50->pipe.set_vertex_elements = nv50_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c new file mode 100644 index 0000000000..948112ffa9 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -0,0 +1,313 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nv50_context.h" +#include "nouveau/nouveau_stateobj.h" + +static void +nv50_state_validate_fb(struct nv50_context *nv50) +{ +	struct nouveau_grobj *tesla = nv50->screen->tesla; +	struct nouveau_stateobj *so = so_new(128, 18); +	struct pipe_framebuffer_state *fb = &nv50->framebuffer; +	unsigned i, w, h, gw = 0; + +	for (i = 0; i < fb->nr_cbufs; i++) { +		if (!gw) { +			w = fb->cbufs[i]->width; +			h = fb->cbufs[i]->height; +			gw = 1; +		} else { +			assert(w == fb->cbufs[i]->width); +			assert(h == fb->cbufs[i]->height); +		} + +		so_method(so, tesla, NV50TCL_RT_HORIZ(i), 2); +		so_data  (so, fb->cbufs[i]->width); +		so_data  (so, fb->cbufs[i]->height); + +		so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5); +		so_reloc (so, nv50_surface_buffer(fb->cbufs[i]), fb->cbufs[i]->offset, +			  NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH | +			  NOUVEAU_BO_RDWR, 0, 0); +		so_reloc (so, nv50_surface_buffer(fb->cbufs[i]), fb->cbufs[i]->offset, +			  NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | +			  NOUVEAU_BO_RDWR, 0, 0); +		switch (fb->cbufs[i]->format) { +		case PIPE_FORMAT_A8R8G8B8_UNORM: +			so_data(so, 0xcf); +			break; +		case PIPE_FORMAT_R5G6B5_UNORM: +			so_data(so, 0xe8); +			break; +		default: +			NOUVEAU_ERR("AIIII unknown format %s\n", +				    pf_name(fb->cbufs[i]->format)); +			so_data(so, 0xe6); +			break; +		} +		so_data(so, 0x00000000); +		so_data(so, 0x00000000); + +		so_method(so, tesla, 0x1224, 1); +		so_data  (so, 1); +	} + +	if (fb->zsbuf) { +		if (!gw) { +			w = fb->zsbuf->width; +			h = fb->zsbuf->height; +			gw = 1; +		} else { +			assert(w == fb->zsbuf->width); +			assert(h == fb->zsbuf->height); +		} + +		so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5); +		so_reloc (so, nv50_surface_buffer(fb->zsbuf), fb->zsbuf->offset, +			  NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH | +			  NOUVEAU_BO_RDWR, 0, 0); +		so_reloc (so, nv50_surface_buffer(fb->zsbuf), fb->zsbuf->offset, +			  NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | +			  NOUVEAU_BO_RDWR, 0, 0); +		switch (fb->zsbuf->format) { +		case PIPE_FORMAT_Z24S8_UNORM: +			so_data(so, 0x16); +			break; +		case PIPE_FORMAT_Z16_UNORM: +			so_data(so, 0x15); +			break; +		default: +			NOUVEAU_ERR("AIIII unknown format %s\n", +				    pf_name(fb->zsbuf->format)); +			so_data(so, 0x16); +			break; +		} +		so_data(so, 0x00000000); +		so_data(so, 0x00000000); + +		so_method(so, tesla, 0x1538, 1); +		so_data  (so, 1); +		so_method(so, tesla, 0x1228, 3); +		so_data  (so, fb->zsbuf->width); +		so_data  (so, fb->zsbuf->height); +		so_data  (so, 0x00010001); +	} + +	so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); +	so_data  (so, w << 16); +	so_data  (so, h << 16); +	so_method(so, tesla, 0x0e04, 2); +	so_data  (so, w << 16); +	so_data  (so, h << 16); +	so_method(so, tesla, 0xdf8, 2); +	so_data  (so, 0); +	so_data  (so, h); + +	so_ref(so, &nv50->state.fb); +} + +static void +nv50_state_emit(struct nv50_context *nv50) +{ +	struct nv50_screen *screen = nv50->screen; +	struct nouveau_winsys *nvws = screen->nvws; + +	if (nv50->pctx_id != screen->cur_pctx) { +		nv50->state.dirty |= 0xffffffff; +		screen->cur_pctx = nv50->pctx_id; +	} + +	if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER) +		so_emit(nvws, nv50->state.fb); +	if (nv50->state.dirty & NV50_NEW_BLEND) +		so_emit(nvws, nv50->state.blend); +	if (nv50->state.dirty & NV50_NEW_ZSA) +		so_emit(nvws, nv50->state.zsa); +	if (nv50->state.dirty & NV50_NEW_VERTPROG) +		so_emit(nvws, nv50->state.vertprog); +	if (nv50->state.dirty & NV50_NEW_FRAGPROG) +		so_emit(nvws, nv50->state.fragprog); +	if (nv50->state.dirty & NV50_NEW_RASTERIZER) +		so_emit(nvws, nv50->state.rast); +	if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) +		so_emit(nvws, nv50->state.blend_colour); +	if (nv50->state.dirty & NV50_NEW_STIPPLE) +		so_emit(nvws, nv50->state.stipple); +	if (nv50->state.dirty & NV50_NEW_SCISSOR) +		so_emit(nvws, nv50->state.scissor); +	if (nv50->state.dirty & NV50_NEW_VIEWPORT) +		so_emit(nvws, nv50->state.viewport); +	if (nv50->state.dirty & NV50_NEW_SAMPLER) +		so_emit(nvws, nv50->state.tsc_upload); +	if (nv50->state.dirty & NV50_NEW_TEXTURE) +		so_emit(nvws, nv50->state.tic_upload); +	if (nv50->state.dirty & NV50_NEW_ARRAYS) { +		so_emit(nvws, nv50->state.vtxfmt); +		so_emit(nvws, nv50->state.vtxbuf); +	} +	nv50->state.dirty = 0; + +	so_emit_reloc_markers(nvws, nv50->state.fb); +	so_emit_reloc_markers(nvws, nv50->state.vertprog); +	so_emit_reloc_markers(nvws, nv50->state.fragprog); +	so_emit_reloc_markers(nvws, nv50->state.vtxbuf); +	so_emit_reloc_markers(nvws, nv50->screen->static_init); +} + +boolean +nv50_state_validate(struct nv50_context *nv50) +{ +	const struct pipe_framebuffer_state *fb = &nv50->framebuffer; +	struct nouveau_grobj *tesla = nv50->screen->tesla; +	struct nouveau_stateobj *so; +	unsigned i; + +	for (i = 0; i < fb->nr_cbufs; i++) +		fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + +	if (fb->zsbuf) +		fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + +	if (nv50->dirty & NV50_NEW_FRAMEBUFFER) +		nv50_state_validate_fb(nv50); + +	if (nv50->dirty & NV50_NEW_BLEND) +		so_ref(nv50->blend->so, &nv50->state.blend); + +	if (nv50->dirty & NV50_NEW_ZSA) +		so_ref(nv50->zsa->so, &nv50->state.zsa); + +	if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB)) +		nv50_vertprog_validate(nv50); + +	if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) +		nv50_fragprog_validate(nv50); + +	if (nv50->dirty & NV50_NEW_RASTERIZER) +		so_ref(nv50->rasterizer->so, &nv50->state.rast); + +	if (nv50->dirty & NV50_NEW_BLEND_COLOUR) { +		so = so_new(5, 0); +		so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4); +		so_data  (so, fui(nv50->blend_colour.color[0])); +		so_data  (so, fui(nv50->blend_colour.color[1])); +		so_data  (so, fui(nv50->blend_colour.color[2])); +		so_data  (so, fui(nv50->blend_colour.color[3])); +		so_ref(so, &nv50->state.blend_colour); +	} + +	if (nv50->dirty & NV50_NEW_STIPPLE) { +		so = so_new(33, 0); +		so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); +		for (i = 0; i < 32; i++) +			so_data(so, nv50->stipple.stipple[i]); +		so_ref(so, &nv50->state.stipple); +	} + +	if (nv50->dirty & (NV50_NEW_SCISSOR | NV50_NEW_RASTERIZER)) { +		struct pipe_rasterizer_state *rast = &nv50->rasterizer->pipe; +		struct pipe_scissor_state *s = &nv50->scissor; + +		if (nv50->state.scissor && +		    (rast->scissor == 0 && nv50->state.scissor_enabled == 0)) +			goto scissor_uptodate; +		nv50->state.scissor_enabled = rast->scissor; + +		so = so_new(3, 0); +		so_method(so, tesla, 0x0ff4, 2); +		if (nv50->state.scissor_enabled) { +			so_data(so, ((s->maxx - s->minx) << 16) | s->minx); +			so_data(so, ((s->maxy - s->miny) << 16) | s->miny); +		} else { +			so_data(so, (8192 << 16)); +			so_data(so, (8192 << 16)); +		} +		so_ref(so, &nv50->state.scissor); +		nv50->state.dirty |= NV50_NEW_SCISSOR; +	} +scissor_uptodate: + +	if (nv50->dirty & NV50_NEW_VIEWPORT) { +		unsigned bypass; + +		if (!nv50->rasterizer->pipe.bypass_clipping) +			bypass = 0; +		else +			bypass = 1; + +		if (nv50->state.viewport && +		    (bypass || !(nv50->dirty & NV50_NEW_VIEWPORT)) && +		    nv50->state.viewport_bypass == bypass) +			goto viewport_uptodate; +		nv50->state.viewport_bypass = bypass; + +		so = so_new(12, 0); +		if (!bypass) { +			so_method(so, tesla, NV50TCL_VIEWPORT_UNK1(0), 3); +			so_data  (so, fui(nv50->viewport.translate[0])); +			so_data  (so, fui(nv50->viewport.translate[1])); +			so_data  (so, fui(nv50->viewport.translate[2])); +			so_method(so, tesla, NV50TCL_VIEWPORT_UNK0(0), 3); +			so_data  (so, fui(nv50->viewport.scale[0])); +			so_data  (so, fui(-nv50->viewport.scale[1])); +			so_data  (so, fui(nv50->viewport.scale[2])); +			so_method(so, tesla, 0x192c, 1); +			so_data  (so, 1); +			so_method(so, tesla, 0x0f90, 1); +			so_data  (so, 0); +		} else { +			so_method(so, tesla, 0x192c, 1); +			so_data  (so, 0); +			so_method(so, tesla, 0x0f90, 1); +			so_data  (so, 1); +		} + +		so_ref(so, &nv50->state.viewport); +	} +viewport_uptodate: + +	if (nv50->dirty & NV50_NEW_SAMPLER) { +		int i; + +		so = so_new(nv50->sampler_nr * 8 + 3, 0); +		so_method(so, tesla, 0x0f00, 1); +		so_data  (so, NV50_CB_TSC); +		so_method(so, tesla, 0x40000f04, nv50->sampler_nr * 8); +		for (i = 0; i < nv50->sampler_nr; i++) +			so_datap (so, nv50->sampler[i], 8); +		so_ref(so, &nv50->state.tsc_upload); +	} + +	if (nv50->dirty & NV50_NEW_TEXTURE) +		nv50_tex_validate(nv50); + +	if (nv50->dirty & NV50_NEW_ARRAYS) +		nv50_vbo_validate(nv50); + +	nv50->state.dirty |= nv50->dirty; +	nv50->dirty = 0; +	nv50_state_emit(nv50); + +	return TRUE; +} + diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c new file mode 100644 index 0000000000..f2dd2eb30b --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -0,0 +1,230 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define __NOUVEAU_PUSH_H__ +#include <stdint.h> +#include "nouveau/nouveau_pushbuf.h" +#include "nv50_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" + +#include "util/u_tile.h" + +static INLINE int +nv50_format(enum pipe_format format) +{ +	switch (format) { +	case PIPE_FORMAT_A8R8G8B8_UNORM: +	case PIPE_FORMAT_Z24S8_UNORM: +		return NV50_2D_DST_FORMAT_32BPP; +	case PIPE_FORMAT_X8R8G8B8_UNORM: +		return NV50_2D_DST_FORMAT_24BPP; +	case PIPE_FORMAT_R5G6B5_UNORM: +		return NV50_2D_DST_FORMAT_16BPP; +	case PIPE_FORMAT_A8_UNORM: +		return NV50_2D_DST_FORMAT_8BPP; +	default: +		return -1; +	} +} + +static int +nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) +{ +	struct nouveau_channel *chan = screen->nvws->channel; +	struct nouveau_grobj *eng2d = screen->eng2d; +	struct nouveau_bo *bo; + 	int format, mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT; + 	int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); +  +	bo = screen->nvws->get_bo(nv50_miptree(ps->texture)->buffer); +	if (!bo) +		return 1; + + 	format = nv50_format(ps->format); + 	if (format < 0) + 		return 1; +   + 	if (!bo->tiled) { + 		BEGIN_RING(chan, eng2d, mthd, 2); + 		OUT_RING  (chan, format); + 		OUT_RING  (chan, 1); + 		BEGIN_RING(chan, eng2d, mthd + 0x14, 5); + 		OUT_RING  (chan, ps->stride); + 		OUT_RING  (chan, ps->width); + 		OUT_RING  (chan, ps->height); + 		OUT_RELOCh(chan, bo, ps->offset, flags); + 		OUT_RELOCl(chan, bo, ps->offset, flags); + 	} else { + 		BEGIN_RING(chan, eng2d, mthd, 5); + 		OUT_RING  (chan, format); + 		OUT_RING  (chan, 0); + 		OUT_RING  (chan, 0); + 		OUT_RING  (chan, 1); + 		OUT_RING  (chan, 0); + 		BEGIN_RING(chan, eng2d, mthd + 0x18, 4); + 		OUT_RING  (chan, ps->width); + 		OUT_RING  (chan, ps->height); + 		OUT_RELOCh(chan, bo, ps->offset, flags); + 		OUT_RELOCl(chan, bo, ps->offset, flags); + 	} +  +#if 0 + 	if (dst) { + 		BEGIN_RING(chan, eng2d, NV50_2D_CLIP_X, 4); + 		OUT_RING  (chan, 0); + 		OUT_RING  (chan, 0); + 		OUT_RING  (chan, surf->width); + 		OUT_RING  (chan, surf->height); + 	} +#endif +   + 	return 0; +} + +int +nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, +		     int dx, int dy, struct pipe_surface *src, int sx, int sy, +		     int w, int h) +{ +	struct nouveau_channel *chan = screen->nvws->channel; +	struct nouveau_grobj *eng2d = screen->eng2d; +	int ret; + +	WAIT_RING (chan, 32); + +	ret = nv50_surface_set(screen, dst, 1); +	if (ret) +		return ret; + +	ret = nv50_surface_set(screen, src, 0); +	if (ret) +		return ret; + +	BEGIN_RING(chan, eng2d, 0x088c, 1); +	OUT_RING  (chan, 0); +	BEGIN_RING(chan, eng2d, NV50_2D_BLIT_DST_X, 4); +	OUT_RING  (chan, dx); +	OUT_RING  (chan, dy); +	OUT_RING  (chan, w); +	OUT_RING  (chan, h); +	BEGIN_RING(chan, eng2d, 0x08c0, 4); +	OUT_RING  (chan, 0); +	OUT_RING  (chan, 1); +	OUT_RING  (chan, 0); +	OUT_RING  (chan, 1); +	BEGIN_RING(chan, eng2d, 0x08d0, 4); +	OUT_RING  (chan, 0); +	OUT_RING  (chan, sx); +	OUT_RING  (chan, 0); +	OUT_RING  (chan, sy); + +	return 0; +} + +static void +nv50_surface_copy(struct pipe_context *pipe, boolean flip, +		  struct pipe_surface *dest, unsigned destx, unsigned desty, +		  struct pipe_surface *src, unsigned srcx, unsigned srcy, +		  unsigned width, unsigned height) +{ +	struct nv50_context *nv50 = (struct nv50_context *)pipe; +	struct nv50_screen *screen = nv50->screen; + +	assert(src->format == dest->format); + +	if (flip) { +		desty += height; +		while (height--) { +			nv50_surface_do_copy(screen, dest, destx, desty--, src, +					     srcx, srcy++, width, 1); +		} +	} else { +		nv50_surface_do_copy(screen, dest, destx, desty, src, srcx, +				     srcy, width, height); +	} +} + +static void +nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, +		  unsigned destx, unsigned desty, unsigned width, +		  unsigned height, unsigned value) +{ +	struct nv50_context *nv50 = (struct nv50_context *)pipe; +	struct nv50_screen *screen = nv50->screen; +	struct nouveau_channel *chan = screen->nvws->channel; +	struct nouveau_grobj *eng2d = screen->eng2d; +	int format, ret; + +	format = nv50_format(dest->format); +	if (format < 0) +		return; + +	WAIT_RING (chan, 32); + +	ret = nv50_surface_set(screen, dest, 1); +	if (ret) +		return; + +	BEGIN_RING(chan, eng2d, 0x0580, 3); +	OUT_RING  (chan, 4); +	OUT_RING  (chan, format); +	OUT_RING  (chan, value); +	BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4); +	OUT_RING  (chan, destx); +	OUT_RING  (chan, desty); +	OUT_RING  (chan, width); +	OUT_RING  (chan, height); +} + +static void * +nv50_surface_map(struct pipe_screen *screen, struct pipe_surface *ps, +		 unsigned flags ) +{ +	struct pipe_winsys *ws = screen->winsys; + +	return ws->buffer_map(ws, nv50_surface_buffer(ps), flags); +} + +static void +nv50_surface_unmap(struct pipe_screen *pscreen, struct pipe_surface *ps) +{ +	struct pipe_winsys *ws = pscreen->winsys; + +	ws->buffer_unmap(ws, nv50_surface_buffer(ps)); +} + +void +nv50_init_surface_functions(struct nv50_context *nv50) +{ +	nv50->pipe.surface_copy = nv50_surface_copy; +	nv50->pipe.surface_fill = nv50_surface_fill; +} + +void +nv50_surface_init_screen_functions(struct pipe_screen *pscreen) +{ +	pscreen->surface_map = nv50_surface_map; +	pscreen->surface_unmap = nv50_surface_unmap; +} + diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c new file mode 100644 index 0000000000..675f9b20cb --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -0,0 +1,163 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nv50_context.h" +#include "nv50_texture.h" + +#include "nouveau/nouveau_stateobj.h" + +static int +nv50_tex_construct(struct nouveau_stateobj *so, struct nv50_miptree *mt) +{ +	switch (mt->base.format) { +	case PIPE_FORMAT_A8R8G8B8_UNORM: +		so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | +			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | +			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | +			    NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | +			    NV50TIC_0_0_FMT_8_8_8_8); +		break; +	case PIPE_FORMAT_A1R5G5B5_UNORM: +		so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | +			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | +			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | +			    NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | +			    NV50TIC_0_0_FMT_1_5_5_5); +		break; +	case PIPE_FORMAT_A4R4G4B4_UNORM: +		so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | +			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | +			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | +			    NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | +			    NV50TIC_0_0_FMT_4_4_4_4); +		break; +	case PIPE_FORMAT_R5G6B5_UNORM: +		so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | +			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | +			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | +			    NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | +			    NV50TIC_0_0_FMT_5_6_5); +		break; +	case PIPE_FORMAT_L8_UNORM: +		so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | +			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | +			    NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | +			    NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | +			    NV50TIC_0_0_FMT_8); +		break; +	case PIPE_FORMAT_A8_UNORM: +		so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM | +			    NV50TIC_0_0_MAPR_ZERO | NV50TIC_0_0_TYPER_UNORM | +			    NV50TIC_0_0_MAPG_ZERO | NV50TIC_0_0_TYPEG_UNORM | +			    NV50TIC_0_0_MAPB_ZERO | NV50TIC_0_0_TYPEB_UNORM | +			    NV50TIC_0_0_FMT_8); +		break; +	case PIPE_FORMAT_I8_UNORM: +		so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM | +			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | +			    NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | +			    NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | +			    NV50TIC_0_0_FMT_8); +		break; +	case PIPE_FORMAT_A8L8_UNORM: +		so_data(so, NV50TIC_0_0_MAPA_C1 | NV50TIC_0_0_TYPEA_UNORM | +			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | +			    NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | +			    NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | +			    NV50TIC_0_0_FMT_8_8); +		break; +	case PIPE_FORMAT_DXT1_RGB: +		so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | +			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | +			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | +			    NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | +			    NV50TIC_0_0_FMT_DXT1); +		break; +	case PIPE_FORMAT_DXT1_RGBA: +		so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | +			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | +			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | +			    NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | +			    NV50TIC_0_0_FMT_DXT1); +		break; +	case PIPE_FORMAT_DXT3_RGBA: +		so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | +			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | +			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | +			    NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | +			    NV50TIC_0_0_FMT_DXT3); +		break; +	case PIPE_FORMAT_DXT5_RGBA: +		so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | +			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | +			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | +			    NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | +			    NV50TIC_0_0_FMT_DXT5); +		break; +	default: +		return 1; +	} + +	so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | +		     NOUVEAU_BO_RD, 0, 0); +	so_data (so, 0xd0005000); +	so_data (so, 0x00300000); +	so_data (so, mt->base.width[0]); +	so_data (so, (mt->base.depth[0] << 16) | mt->base.height[0]); +	so_data (so, 0x03000000); +	so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH | +		     NOUVEAU_BO_RD, 0, 0); + +	return 0; +} + +void +nv50_tex_validate(struct nv50_context *nv50) +{ +	struct nouveau_grobj *tesla = nv50->screen->tesla; +	struct nouveau_stateobj *so; +	int unit, level, image; + +	so = so_new(nv50->miptree_nr * 8 + 3, nv50->miptree_nr * 2); +	so_method(so, tesla, 0x0f00, 1); +	so_data  (so, NV50_CB_TIC); +	so_method(so, tesla, 0x40000f04, nv50->miptree_nr * 8); +	for (unit = 0; unit < nv50->miptree_nr; unit++) { +		struct nv50_miptree *mt = nv50->miptree[unit]; + +		for (level = 0; level <= mt->base.last_level; level++) { +			for (image = 0; image < mt->image_nr; image++) { +				nv50_miptree_sync(&nv50->screen->pipe, mt, +						  level, image); +			} +		} + +		if (nv50_tex_construct(so, mt)) { +			NOUVEAU_ERR("failed tex validate\n"); +			so_ref(NULL, &so); +			return; +		} +	} + +	so_ref(so, &nv50->state.tic_upload); +} + diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h new file mode 100644 index 0000000000..aca622c73b --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_texture.h @@ -0,0 +1,129 @@ +#ifndef __NV50_TEXTURE_H__ +#define __NV50_TEXTURE_H__ + +/* It'd be really nice to have these in nouveau_class.h generated by + * renouveau like the rest of the object header - but not sure it can + * handle non-object stuff nicely - need to look into it. + */ + +/* Texture image control block */ +#define NV50TIC_0_0_MAPA_MASK                                     0x38000000 +#define NV50TIC_0_0_MAPA_ZERO                                     0x00000000 +#define NV50TIC_0_0_MAPA_C0                                       0x10000000 +#define NV50TIC_0_0_MAPA_C1                                       0x18000000 +#define NV50TIC_0_0_MAPA_C2                                       0x20000000 +#define NV50TIC_0_0_MAPA_C3                                       0x28000000 +#define NV50TIC_0_0_MAPA_ONE                                      0x38000000 +#define NV50TIC_0_0_MAPR_MASK                                     0x07000000 +#define NV50TIC_0_0_MAPR_ZERO                                     0x00000000 +#define NV50TIC_0_0_MAPR_C0                                       0x02000000 +#define NV50TIC_0_0_MAPR_C1                                       0x03000000 +#define NV50TIC_0_0_MAPR_C2                                       0x04000000 +#define NV50TIC_0_0_MAPR_C3                                       0x05000000 +#define NV50TIC_0_0_MAPR_ONE                                      0x07000000 +#define NV50TIC_0_0_MAPG_MASK                                     0x00e00000 +#define NV50TIC_0_0_MAPG_ZERO                                     0x00000000 +#define NV50TIC_0_0_MAPG_C0                                       0x00400000 +#define NV50TIC_0_0_MAPG_C1                                       0x00600000 +#define NV50TIC_0_0_MAPG_C2                                       0x00800000 +#define NV50TIC_0_0_MAPG_C3                                       0x00a00000 +#define NV50TIC_0_0_MAPG_ONE                                      0x00e00000 +#define NV50TIC_0_0_MAPB_MASK                                     0x001c0000 +#define NV50TIC_0_0_MAPB_ZERO                                     0x00000000 +#define NV50TIC_0_0_MAPB_C0                                       0x00080000 +#define NV50TIC_0_0_MAPB_C1                                       0x000c0000 +#define NV50TIC_0_0_MAPB_C2                                       0x00100000 +#define NV50TIC_0_0_MAPB_C3                                       0x00140000 +#define NV50TIC_0_0_MAPB_ONE                                      0x001c0000 +#define NV50TIC_0_0_TYPEA_MASK                                    0x00038000 +#define NV50TIC_0_0_TYPEA_UNORM                                   0x00010000 +#define NV50TIC_0_0_TYPER_MASK                                    0x00007000 +#define NV50TIC_0_0_TYPER_UNORM                                   0x00002000 +#define NV50TIC_0_0_TYPEG_MASK                                    0x00000e00 +#define NV50TIC_0_0_TYPEG_UNORM                                   0x00000400 +#define NV50TIC_0_0_TYPEB_MASK                                    0x000001c0 +#define NV50TIC_0_0_TYPEB_UNORM                                   0x00000080 +#define NV50TIC_0_0_FMT_MASK                                      0x0000003c +#define NV50TIC_0_0_FMT_8_8_8_8                                   0x00000008 +#define NV50TIC_0_0_FMT_4_4_4_4                                   0x00000012 +#define NV50TIC_0_0_FMT_1_5_5_5                                   0x00000013 +#define NV50TIC_0_0_FMT_5_6_5                                     0x00000015 +#define NV50TIC_0_0_FMT_8_8                                       0x00000018 +#define NV50TIC_0_0_FMT_8                                         0x0000001d +#define NV50TIC_0_0_FMT_DXT1                                      0x00000024 +#define NV50TIC_0_0_FMT_DXT3                                      0x00000025 +#define NV50TIC_0_0_FMT_DXT5                                      0x00000026 + +#define NV50TIC_0_1_OFFSET_LOW_MASK                               0xffffffff +#define NV50TIC_0_1_OFFSET_LOW_SHIFT                                       0 + +#define NV50TIC_0_2_UNKNOWN_MASK                                  0xffffffff + +#define NV50TIC_0_3_UNKNOWN_MASK                                  0xffffffff + +#define NV50TIC_0_4_WIDTH_MASK                                    0x0000ffff +#define NV50TIC_0_4_WIDTH_SHIFT                                            0 + +#define NV50TIC_0_5_DEPTH_MASK                                    0xffff0000 +#define NV50TIC_0_5_DEPTH_SHIFT                                           16 +#define NV50TIC_0_5_HEIGHT_MASK                                   0x0000ffff +#define NV50TIC_0_5_HEIGHT_SHIFT                                           0 + +#define NV50TIC_0_6_UNKNOWN_MASK                                  0xffffffff + +#define NV50TIC_0_7_OFFSET_HIGH_MASK                              0xffffffff +#define NV50TIC_0_7_OFFSET_HIGH_SHIFT                                      0 + +/* Texture sampler control block */ +#define NV50TSC_1_0_WRAPS_MASK                                   0x00000007 +#define NV50TSC_1_0_WRAPS_REPEAT                                 0x00000000 +#define NV50TSC_1_0_WRAPS_MIRROR_REPEAT                          0x00000001 +#define NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE                          0x00000002 +#define NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER                        0x00000003 +#define NV50TSC_1_0_WRAPS_CLAMP                                  0x00000004 +#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE                   0x00000005 +#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER                 0x00000006 +#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP                           0x00000007 +#define NV50TSC_1_0_WRAPT_MASK                                   0x00000038 +#define NV50TSC_1_0_WRAPT_REPEAT                                 0x00000000 +#define NV50TSC_1_0_WRAPT_MIRROR_REPEAT                          0x00000008 +#define NV50TSC_1_0_WRAPT_CLAMP_TO_EDGE                          0x00000010 +#define NV50TSC_1_0_WRAPT_CLAMP_TO_BORDER                        0x00000018 +#define NV50TSC_1_0_WRAPT_CLAMP                                  0x00000020 +#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_EDGE                   0x00000028 +#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_BORDER                 0x00000030 +#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP                           0x00000038 +#define NV50TSC_1_0_WRAPR_MASK                                   0x000001c0 +#define NV50TSC_1_0_WRAPR_REPEAT                                 0x00000000 +#define NV50TSC_1_0_WRAPR_MIRROR_REPEAT                          0x00000040 +#define NV50TSC_1_0_WRAPR_CLAMP_TO_EDGE                          0x00000080 +#define NV50TSC_1_0_WRAPR_CLAMP_TO_BORDER                        0x000000c0 +#define NV50TSC_1_0_WRAPR_CLAMP                                  0x00000100 +#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_EDGE                   0x00000140 +#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_BORDER                 0x00000180 +#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP                           0x000001c0 + +#define NV50TSC_1_1_MAGF_MASK                                    0x00000003 +#define NV50TSC_1_1_MAGF_NEAREST                                 0x00000001 +#define NV50TSC_1_1_MAGF_LINEAR                                  0x00000002 +#define NV50TSC_1_1_MINF_MASK                                    0x00000030 +#define NV50TSC_1_1_MINF_NEAREST                                 0x00000010 +#define NV50TSC_1_1_MINF_LINEAR                                  0x00000020 +#define NV50TSC_1_1_MIPF_MASK                                    0x000000c0 +#define NV50TSC_1_1_MIPF_NONE                                    0x00000040 +#define NV50TSC_1_1_MIPF_NEAREST                                 0x00000080 +#define NV50TSC_1_1_MIPF_LINEAR                                  0x000000c0 + +#define NV50TSC_1_2_UNKNOWN_MASK                                 0xffffffff + +#define NV50TSC_1_3_UNKNOWN_MASK                                 0xffffffff + +#define NV50TSC_1_4_UNKNOWN_MASK                                 0xffffffff + +#define NV50TSC_1_5_UNKNOWN_MASK                                 0xffffffff + +#define NV50TSC_1_6_UNKNOWN_MASK                                 0xffffffff + +#define NV50TSC_1_7_UNKNOWN_MASK                                 0xffffffff + +#endif diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c new file mode 100644 index 0000000000..0c970adb03 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -0,0 +1,254 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv50_context.h" + +static INLINE unsigned +nv50_prim(unsigned mode) +{ +	switch (mode) { +	case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS; +	case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES; +	case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP; +	case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP; +	case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES; +	case PIPE_PRIM_TRIANGLE_STRIP: +		return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP; +	case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN; +	case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS; +	case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP; +	case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON; +	default: +		break; +	} + +	NOUVEAU_ERR("invalid primitive type %d\n", mode); +	return NV50TCL_VERTEX_BEGIN_POINTS; +} + +boolean +nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, +		 unsigned count) +{ +	struct nv50_context *nv50 = nv50_context(pipe); +	struct nouveau_channel *chan = nv50->screen->nvws->channel; +	struct nouveau_grobj *tesla = nv50->screen->tesla; + +	nv50_state_validate(nv50); + +	BEGIN_RING(chan, tesla, 0x142c, 1); +	OUT_RING  (chan, 0); +	BEGIN_RING(chan, tesla, 0x142c, 1); +	OUT_RING  (chan, 0); +	BEGIN_RING(chan, tesla, 0x1440, 1); +	OUT_RING  (chan, 0); +	BEGIN_RING(chan, tesla, 0x1334, 1); +	OUT_RING  (chan, 0); + +	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); +	OUT_RING  (chan, nv50_prim(mode)); +	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); +	OUT_RING  (chan, start); +	OUT_RING  (chan, count); +	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); +	OUT_RING  (chan, 0); + +	pipe->flush(pipe, 0, NULL); +	return TRUE; +} + +static INLINE void +nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, +			      unsigned start, unsigned count) +{ +	struct nouveau_channel *chan = nv50->screen->nvws->channel; +	struct nouveau_grobj *tesla = nv50->screen->tesla; + +	map += start; + +	if (count & 1) { +		BEGIN_RING(chan, tesla, 0x15e8, 1); +		OUT_RING  (chan, map[0]); +		map++; +		count--; +	} + +	while (count) { +		unsigned nr = count > 2046 ? 2046 : count; +		int i; + +		BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); +		for (i = 0; i < nr; i += 2) +			OUT_RING  (chan, (map[1] << 16) | map[0]); + +		count -= nr; +		map += nr; +	} +} + +static INLINE void +nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, +			      unsigned start, unsigned count) +{ +	struct nouveau_channel *chan = nv50->screen->nvws->channel; +	struct nouveau_grobj *tesla = nv50->screen->tesla; + +	map += start; + +	if (count & 1) { +		BEGIN_RING(chan, tesla, 0x15e8, 1); +		OUT_RING  (chan, map[0]); +		map++; +		count--; +	} + +	while (count) { +		unsigned nr = count > 2046 ? 2046 : count; +		int i; + +		BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); +		for (i = 0; i < nr; i += 2) +			OUT_RING  (chan, (map[1] << 16) | map[0]); + +		count -= nr; +		map += nr; +	} +} + +static INLINE void +nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint8_t *map, +			      unsigned start, unsigned count) +{ +	struct nouveau_channel *chan = nv50->screen->nvws->channel; +	struct nouveau_grobj *tesla = nv50->screen->tesla; + +	map += start; + +	while (count) { +		unsigned nr = count > 2047 ? 2047 : count; + +		BEGIN_RING(chan, tesla, 0x400015e8, nr); +		OUT_RINGp (chan, map, nr); + +		count -= nr; +		map += nr; +	} +} + +boolean +nv50_draw_elements(struct pipe_context *pipe, +		   struct pipe_buffer *indexBuffer, unsigned indexSize, +		   unsigned mode, unsigned start, unsigned count) +{ +	struct nv50_context *nv50 = nv50_context(pipe); +	struct nouveau_channel *chan = nv50->screen->nvws->channel; +	struct nouveau_grobj *tesla = nv50->screen->tesla; +	struct pipe_winsys *ws = pipe->winsys; +	void *map = ws->buffer_map(ws, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); + +	nv50_state_validate(nv50); + +	BEGIN_RING(chan, tesla, 0x142c, 1); +	OUT_RING  (chan, 0); +	BEGIN_RING(chan, tesla, 0x142c, 1); +	OUT_RING  (chan, 0); + +	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); +	OUT_RING  (chan, nv50_prim(mode)); +	switch (indexSize) { +	case 1: +		nv50_draw_elements_inline_u08(nv50, map, start, count); +		break; +	case 2: +		nv50_draw_elements_inline_u16(nv50, map, start, count); +		break; +	case 4: +		nv50_draw_elements_inline_u32(nv50, map, start, count); +		break; +	default: +		assert(0); +	} +	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); +	OUT_RING  (chan, 0); + +	pipe->flush(pipe, 0, NULL); +	return TRUE; +} + +void +nv50_vbo_validate(struct nv50_context *nv50) +{ +	struct nouveau_grobj *tesla = nv50->screen->tesla; +	struct nouveau_stateobj *vtxbuf, *vtxfmt; +	int i, vpi = 0; + +	vtxbuf = so_new(nv50->vtxelt_nr * 4, nv50->vtxelt_nr * 2); +	vtxfmt = so_new(nv50->vtxelt_nr + 1, 0); +	so_method(vtxfmt, tesla, 0x1ac0, nv50->vtxelt_nr); + +	for (i = 0; i < nv50->vtxelt_nr; i++) { +		struct pipe_vertex_element *ve = &nv50->vtxelt[i]; +		struct pipe_vertex_buffer *vb = +			&nv50->vtxbuf[ve->vertex_buffer_index]; + +		switch (ve->src_format) { +		case PIPE_FORMAT_R32G32B32A32_FLOAT: +			so_data(vtxfmt, 0x7e080000 | i); +			break; +		case PIPE_FORMAT_R32G32B32_FLOAT: +			so_data(vtxfmt, 0x7e100000 | i); +			break; +		case PIPE_FORMAT_R32G32_FLOAT: +			so_data(vtxfmt, 0x7e200000 | i); +			break; +		case PIPE_FORMAT_R32_FLOAT: +			so_data(vtxfmt, 0x7e900000 | i); +			break; +		case PIPE_FORMAT_R8G8B8A8_UNORM: +			so_data(vtxfmt, 0x24500000 | i); +			break; +		default: +		{ +			NOUVEAU_ERR("invalid vbo format %s\n", +				    pf_name(ve->src_format)); +			assert(0); +			return; +		} +		} + +		so_method(vtxbuf, tesla, 0x900 + (i * 16), 3); +		so_data  (vtxbuf, 0x20000000 | vb->stride); +		so_reloc (vtxbuf, vb->buffer, vb->buffer_offset + +			  ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | +			  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); +		so_reloc (vtxbuf, vb->buffer, vb->buffer_offset + +			  ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | +			  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); +	} + +	so_ref (vtxfmt, &nv50->state.vtxfmt); +	so_ref (vtxbuf, &nv50->state.vtxbuf); +} + diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile new file mode 100644 index 0000000000..e83d943cd8 --- /dev/null +++ b/src/gallium/drivers/r300/Makefile @@ -0,0 +1,21 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = r300 + +C_SOURCES = \ +	r300_chipset.c \ +	r300_clear.c \ +	r300_context.c \ +	r300_emit.c \ +	r300_flush.c \ +	r300_screen.c \ +	r300_state.c \ +	r300_state_shader.c \ +	r300_surface.c \ +	r300_swtcl_emit.c \ +	r300_texture.c + +include ../../Makefile.template + +symlinks: diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript new file mode 100644 index 0000000000..18684c3e7f --- /dev/null +++ b/src/gallium/drivers/r300/SConscript @@ -0,0 +1,17 @@ +Import('*') + +env = env.Clone() + +r300 = env.ConvenienceLibrary( +	target = 'r300', +	source = [ +		'r300_blit.c', +		'r300_clear.c', +		'r300_context.c', +		'r300_screen.c', +		'r300_state.c', +		'r300_surface.c', +	]) + +Export('r300') + diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c new file mode 100644 index 0000000000..7def62422a --- /dev/null +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -0,0 +1,348 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_chipset.h" +#include "pipe/p_debug.h" + +/* r300_chipset: A file all to itself for deducing the various properties of + * Radeons. */ + +/* Parse a PCI ID and fill an r300_capabilities struct with information. */ +void r300_parse_chipset(struct r300_capabilities* caps) +{ +    /* Reasonable defaults */ +    caps->has_tcl = TRUE; +    caps->is_r500 = FALSE; +    caps->num_vert_fpus = 4; + + +    /* Note: These are not ordered by PCI ID. I leave that task to GCC, +     * which will perform the ordering while collating jump tables. Instead, +     * I've tried to group them according to capabilities and age. */ +    switch (caps->pci_id) { +        case 0x4144: +            caps->family = CHIP_FAMILY_R300; +            break; + +        case 0x4145: +        case 0x4146: +        case 0x4147: +        case 0x4E44: +        case 0x4E45: +        case 0x4E46: +        case 0x4E47: +            caps->family = CHIP_FAMILY_R300; +            break; + +        case 0x4150: +        case 0x4151: +        case 0x4152: +        case 0x4153: +        case 0x4154: +        case 0x4155: +        case 0x4156: +        case 0x4E50: +        case 0x4E51: +        case 0x4E52: +        case 0x4E53: +        case 0x4E54: +        case 0x4E56: +            caps->family = CHIP_FAMILY_RV350; +            break; + +        case 0x4148: +        case 0x4149: +        case 0x414A: +        case 0x414B: +        case 0x4E48: +        case 0x4E49: +        case 0x4E4B: +            caps->family = CHIP_FAMILY_R350; +            break; + +        case 0x4E4A: +            caps->family = CHIP_FAMILY_R360; +            break; + +        case 0x5460: +        case 0x5462: +        case 0x5464: +        case 0x5B60: +        case 0x5B62: +        case 0x5B63: +        case 0x5B64: +        case 0x5B65: +            caps->family = CHIP_FAMILY_RV370; +            break; + +        case 0x3150: +        case 0x3152: +        case 0x3154: +        case 0x3E50: +        case 0x3E54: +            caps->family = CHIP_FAMILY_RV380; +            break; + +        case 0x4A48: +        case 0x4A49: +        case 0x4A4A: +        case 0x4A4B: +        case 0x4A4C: +        case 0x4A4D: +        case 0x4A4E: +        case 0x4A4F: +        case 0x4A50: +        case 0x4A54: +            caps->family = CHIP_FAMILY_R420; +            caps->num_vert_fpus = 6; +            break; + +        case 0x5548: +        case 0x5549: +        case 0x554A: +        case 0x554B: +        case 0x5550: +        case 0x5551: +        case 0x5552: +        case 0x5554: +        case 0x5D57: +            caps->family = CHIP_FAMILY_R423; +            caps->num_vert_fpus = 6; +            break; + +        case 0x554C: +        case 0x554D: +        case 0x554E: +        case 0x554F: +        case 0x5D48: +        case 0x5D49: +        case 0x5D4A: +            caps->family = CHIP_FAMILY_R430; +            caps->num_vert_fpus = 6; +            break; + +        case 0x5D4C: +        case 0x5D4D: +        case 0x5D4E: +        case 0x5D4F: +        case 0x5D50: +        case 0x5D52: +            caps->family = CHIP_FAMILY_R480; +            caps->num_vert_fpus = 6; +            break; + +        case 0x4B49: +        case 0x4B4A: +        case 0x4B4B: +        case 0x4B4C: +            caps->family = CHIP_FAMILY_R481; +            caps->num_vert_fpus = 6; +            break; + +        case 0x5E4C: +        case 0x5E4F: +        case 0x564A: +        case 0x564B: +        case 0x564F: +        case 0x5652: +        case 0x5653: +        case 0x5657: +        case 0x5E48: +        case 0x5E4A: +        case 0x5E4B: +        case 0x5E4D: +            caps->family = CHIP_FAMILY_RV410; +            caps->num_vert_fpus = 6; +            break; + +        case 0x5954: +        case 0x5955: +            caps->family = CHIP_FAMILY_RS480; +            caps->has_tcl = FALSE; +            break; + +        case 0x5974: +        case 0x5975: +            caps->family = CHIP_FAMILY_RS482; +            caps->has_tcl = FALSE; +            break; + +        case 0x5A41: +        case 0x5A42: +            caps->family = CHIP_FAMILY_RS400; +            caps->has_tcl = FALSE; +            break; + +        case 0x5A61: +        case 0x5A62: +            caps->family = CHIP_FAMILY_RC410; +            caps->has_tcl = FALSE; +            break; + +        case 0x791E: +        case 0x791F: +            caps->family = CHIP_FAMILY_RS690; +            caps->has_tcl = FALSE; +            break; + +        case 0x796C: +        case 0x796D: +        case 0x796E: +        case 0x796F: +            caps->family = CHIP_FAMILY_RS740; +            caps->has_tcl = FALSE; +            break; + +        case 0x7100: +        case 0x7101: +        case 0x7102: +        case 0x7103: +        case 0x7104: +        case 0x7105: +        case 0x7106: +        case 0x7108: +        case 0x7109: +        case 0x710A: +        case 0x710B: +        case 0x710C: +        case 0x710E: +        case 0x710F: +            caps->family = CHIP_FAMILY_R520; +            caps->num_vert_fpus = 8; +            caps->is_r500 = TRUE; +            break; + +        case 0x7140: +        case 0x7141: +        case 0x7142: +        case 0x7143: +        case 0x7144: +        case 0x7145: +        case 0x7146: +        case 0x7147: +        case 0x7149: +        case 0x714A: +        case 0x714B: +        case 0x714C: +        case 0x714D: +        case 0x714E: +        case 0x714F: +        case 0x7151: +        case 0x7152: +        case 0x7153: +        case 0x715E: +        case 0x715F: +        case 0x7180: +        case 0x7181: +        case 0x7183: +        case 0x7186: +        case 0x7187: +        case 0x7188: +        case 0x718A: +        case 0x718B: +        case 0x718C: +        case 0x718D: +        case 0x718F: +        case 0x7193: +        case 0x7196: +        case 0x719B: +        case 0x719F: +        case 0x7200: +        case 0x7210: +        case 0x7211: +            caps->family = CHIP_FAMILY_RV515; +            caps->num_vert_fpus = 2; +            caps->is_r500 = TRUE; +            break; + +        case 0x71C0: +        case 0x71C1: +        case 0x71C2: +        case 0x71C3: +        case 0x71C4: +        case 0x71C5: +        case 0x71C6: +        case 0x71C7: +        case 0x71CD: +        case 0x71CE: +        case 0x71D2: +        case 0x71D4: +        case 0x71D5: +        case 0x71D6: +        case 0x71DA: +        case 0x71DE: +            caps->family = CHIP_FAMILY_RV530; +            caps->num_vert_fpus = 5; +            caps->is_r500 = TRUE; +            break; + +        case 0x7240: +        case 0x7243: +        case 0x7244: +        case 0x7245: +        case 0x7246: +        case 0x7247: +        case 0x7248: +        case 0x7249: +        case 0x724A: +        case 0x724B: +        case 0x724C: +        case 0x724D: +        case 0x724E: +        case 0x724F: +        case 0x7284: +            caps->family = CHIP_FAMILY_R580; +            caps->num_vert_fpus = 8; +            caps->is_r500 = TRUE; +            break; + +        case 0x7280: +            caps->family = CHIP_FAMILY_RV570; +            caps->num_vert_fpus = 5; +            caps->is_r500 = TRUE; +            break; + +        case 0x7281: +        case 0x7283: +        case 0x7287: +        case 0x7288: +        case 0x7289: +        case 0x728B: +        case 0x728C: +        case 0x7290: +        case 0x7291: +        case 0x7293: +        case 0x7297: +            caps->family = CHIP_FAMILY_RV560; +            caps->num_vert_fpus = 5; +            caps->is_r500 = TRUE; +            break; + +        default: +            debug_printf("r300: Warning: Unknown chipset 0x%x\n", +                caps->pci_id); +            break; +    } + +    /* Force off TCL for now */ +    caps->has_tcl = FALSE; +} diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h new file mode 100644 index 0000000000..a9cd372ec5 --- /dev/null +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -0,0 +1,79 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_CHIPSET_H +#define R300_CHIPSET_H + +#include "pipe/p_compiler.h" + +/* Structure containing all the possible information about a specific Radeon + * in the R3xx, R4xx, and R5xx families. */ +struct r300_capabilities { +    /* PCI ID */ +    uint32_t pci_id; +    /* Chipset family */ +    int family; +    /* The number of vertex floating-point units */ +    int num_vert_fpus; +    /* The number of fragment pipes */ +    int num_frag_pipes; +    /* Whether or not TCL is physically present */ +    boolean has_tcl; +    /* Whether or not this is an RV515 or newer; R500s have many differences +     * that require extra consideration, compared to their R3xx cousins: +     * - Extra bit of width and height on texture sizes +     * - Blend color is split across two registers +     * - Universal Shader (US) block used for fragment shaders */ +    boolean is_r500; +}; + +/* Enumerations for legibility and telling which card we're running on. */ +enum { +    CHIP_FAMILY_R300 = 0, +    CHIP_FAMILY_R350, +    CHIP_FAMILY_R360, +    CHIP_FAMILY_RV350, +    CHIP_FAMILY_RV370, +    CHIP_FAMILY_RV380, +    CHIP_FAMILY_R420, +    CHIP_FAMILY_R423, +    CHIP_FAMILY_R430, +    CHIP_FAMILY_R480, +    CHIP_FAMILY_R481, +    CHIP_FAMILY_RV410, +    CHIP_FAMILY_RS400, +    CHIP_FAMILY_RC410, +    CHIP_FAMILY_RS480, +    CHIP_FAMILY_RS482, +    CHIP_FAMILY_RS690, +    CHIP_FAMILY_RS740, +    CHIP_FAMILY_RV515, +    CHIP_FAMILY_R520, +    CHIP_FAMILY_RV530, +    CHIP_FAMILY_R580, +    CHIP_FAMILY_RV560, +    CHIP_FAMILY_RV570 +}; + +void r300_parse_chipset(struct r300_capabilities* caps); + +#endif /* R300_CHIPSET_H */ diff --git a/src/gallium/drivers/r300/r300_clear.c b/src/gallium/drivers/r300/r300_clear.c new file mode 100644 index 0000000000..fd28437aaa --- /dev/null +++ b/src/gallium/drivers/r300/r300_clear.c @@ -0,0 +1,33 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_clear.h" + +/* This gets its own file because Intel's is in its own file. + * I assume there's a good reason. */ +void r300_clear(struct pipe_context* pipe, +                struct pipe_surface* ps, +                unsigned color) +{ +    pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, color); +    ps->status = PIPE_SURFACE_STATUS_DEFINED; +}
\ No newline at end of file diff --git a/src/gallium/drivers/r300/r300_clear.h b/src/gallium/drivers/r300/r300_clear.h new file mode 100644 index 0000000000..e24a0690c9 --- /dev/null +++ b/src/gallium/drivers/r300/r300_clear.h @@ -0,0 +1,27 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "pipe/p_context.h" + +void r300_clear(struct pipe_context* pipe, +                struct pipe_surface* ps, +                unsigned color); diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c new file mode 100644 index 0000000000..7b605ae87a --- /dev/null +++ b/src/gallium/drivers/r300/r300_context.c @@ -0,0 +1,68 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_context.h" + +static void r300_destroy_context(struct pipe_context* context) { +    struct r300_context* r300 = r300_context(context); + +    draw_destroy(r300->draw); + +    FREE(r300->blend_color_state); +    FREE(r300->scissor_state); +    FREE(r300); +} + +struct pipe_context* r300_create_context(struct pipe_screen* screen, +                                         struct pipe_winsys* winsys, +                                         struct r300_winsys* r300_winsys) +{ +    struct r300_context* r300 = CALLOC_STRUCT(r300_context); + +    if (!r300) +        return NULL; + +    r300->winsys = r300_winsys; +    r300->context.winsys = winsys; +    r300->context.screen = r300_create_screen(winsys, r300_winsys); + +    r300->context.destroy = r300_destroy_context; + +    r300->context.clear = r300_clear; + +    r300->draw = draw_create(); +    /*XXX draw_set_rasterize_stage(r300->draw, r300_draw_swtcl_stage(r300));*/ + +    r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); +    r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); + +    r300_init_flush_functions(r300); + +    r300_init_surface_functions(r300); + +    r300_init_state_functions(r300); + +    r300->dirty_state = R300_NEW_KITCHEN_SINK; +    r300->dirty_hw++; + +    return &r300->context; +} diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h new file mode 100644 index 0000000000..376c57639d --- /dev/null +++ b/src/gallium/drivers/r300/r300_context.h @@ -0,0 +1,190 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_CONTEXT_H +#define R300_CONTEXT_H + +#include "draw/draw_context.h" +#include "pipe/p_context.h" +#include "tgsi/tgsi_scan.h" +#include "util/u_memory.h" + +#include "r300_clear.h" +#include "r300_screen.h" +#include "r300_winsys.h" + +struct r300_blend_state { +    uint32_t blend_control;       /* R300_RB3D_CBLEND: 0x4e04 */ +    uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */ +    uint32_t rop;                 /* R300_RB3D_ROPCNTL: 0x4e18 */ +    uint32_t dither;              /* R300_RB3D_DITHER_CTL: 0x4e50 */ +}; + +struct r300_blend_color_state { +    /* RV515 and earlier */ +    uint32_t blend_color;            /* R300_RB3D_BLEND_COLOR: 0x4e10 */ +    /* R520 and newer */ +    uint32_t blend_color_red_alpha;  /* R500_RB3D_CONSTANT_COLOR_AR: 0x4ef8 */ +    uint32_t blend_color_green_blue; /* R500_RB3D_CONSTANT_COLOR_GB: 0x4efc */ +}; + +struct r300_dsa_state { +    uint32_t alpha_function;    /* R300_FG_ALPHA_FUNC: 0x4bd4 */ +    uint32_t alpha_reference;   /* R500_FG_ALPHA_VALUE: 0x4be0 */ +    uint32_t z_buffer_control;  /* R300_ZB_CNTL: 0x4f00 */ +    uint32_t z_stencil_control; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */ +    uint32_t stencil_ref_mask;  /* R300_ZB_STENCILREFMASK: 0x4f08 */ +    uint32_t z_buffer_top;      /* R300_ZB_ZTOP: 0x4f14 */ +    uint32_t stencil_ref_bf;    /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */ +}; + +struct r300_rs_state { +    uint32_t vap_control_status;    /* R300_VAP_CNTL_STATUS: 0x2140 */ +    uint32_t point_size;            /* R300_GA_POINT_SIZE: 0x421c */ +    uint32_t line_control;          /* R300_GA_LINE_CNTL: 0x4234 */ +    uint32_t depth_scale_front;  /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ +    uint32_t depth_offset_front;/* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */ +    uint32_t depth_scale_back;    /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */ +    uint32_t depth_offset_back;  /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */ +    uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */ +    uint32_t cull_mode;             /* R300_SU_CULL_MODE: 0x42b8 */ +    uint32_t line_stipple_config;   /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */ +    uint32_t line_stipple_value;    /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */ +}; + +struct r300_sampler_state { +    uint32_t filter0;      /* R300_TX_FILTER0: 0x4400 */ +    uint32_t filter1;      /* R300_TX_FILTER1: 0x4440 */ +    uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */ +}; + +struct r300_scissor_state { +    uint32_t scissor_top_left;     /* R300_SC_SCISSORS_TL: 0x43e0 */ +    uint32_t scissor_bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ +}; + +struct r300_texture_state { +}; + +#define R300_NEW_BLEND           0x000001 +#define R300_NEW_BLEND_COLOR     0x000002 +#define R300_NEW_DSA             0x000004 +#define R300_NEW_FRAMEBUFFERS    0x000008 +#define R300_NEW_FRAGMENT_SHADER 0x000010 +#define R300_NEW_RASTERIZER      0x000020 +#define R300_NEW_SAMPLER         0x000040 +#define R300_NEW_SCISSOR         0x004000 +#define R300_NEW_TEXTURE         0x008000 +#define R300_NEW_VERTEX_SHADER   0x800000 +#define R300_NEW_KITCHEN_SINK    0xffffff + +/* The next several objects are not pure Radeon state; they inherit from + * various Gallium classes. */ + +struct r3xx_fragment_shader { +    /* Parent class */ +    struct pipe_shader_state state; +    struct tgsi_shader_info info; + +    /* Has this shader been translated yet? */ +    boolean translated; +}; + +struct r300_fragment_shader { +    /* Parent class */ +    struct r3xx_fragment_shader shader; +}; + +struct r500_fragment_shader { +    /* Parent class */ +    struct r3xx_fragment_shader shader; +}; + +struct r300_texture { +    /* Parent class */ +    struct pipe_texture tex; + +    /* Offsets into the buffer. */ +    unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; + +    /* Total size of this texture, in bytes. */ +    unsigned size; + +    /* Pipe buffer backing this texture. */ +    struct pipe_buffer* buffer; +}; + +struct r300_context { +    /* Parent class */ +    struct pipe_context context; + +    /* The interface to the windowing system, etc. */ +    struct r300_winsys* winsys; +    /* Draw module. Used mostly for SW TCL. */ +    struct draw_context* draw; + +    /* Various CSO state objects. */ +    /* Blend state. */ +    struct r300_blend_state* blend_state; +    /* Blend color state. */ +    struct r300_blend_color_state* blend_color_state; +    /* Depth, stencil, and alpha state. */ +    struct r300_dsa_state* dsa_state; +    /* Fragment shader. */ +    struct r3xx_fragment_shader* fs; +    /* Framebuffer state. We currently don't need our own version of this. */ +    struct pipe_framebuffer_state framebuffer_state; +    /* Rasterizer state. */ +    struct r300_rs_state* rs_state; +    /* Sampler states. */ +    struct r300_sampler_state* sampler_states[8]; +    int sampler_count; +    /* Scissor state. */ +    struct r300_scissor_state* scissor_state; +    /* Texture states. */ +    struct r300_texture* textures[8]; +    struct r300_texture_state* texture_states[8]; +    int texture_count; +    /* Bitmask of dirty state objects. */ +    uint32_t dirty_state; +    /* Flag indicating whether or not the HW is dirty. */ +    uint32_t dirty_hw; +}; + +/* Convenience cast wrapper. */ +static struct r300_context* r300_context(struct pipe_context* context) { +    return (struct r300_context*)context; +} + +/* Context initialization. */ +void r300_init_state_functions(struct r300_context* r300); +void r300_init_surface_functions(struct r300_context* r300); + +/* Fun with includes: r300_winsys also declares this prototype. + * We'll just step out in that case... */ +#ifndef R300_WINSYS_H +struct pipe_context* r300_create_context(struct pipe_screen* screen, +                                         struct pipe_winsys* winsys, +                                         struct r300_winsys* r300_winsys); +#endif + +#endif /* R300_CONTEXT_H */ diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h new file mode 100644 index 0000000000..385b61a096 --- /dev/null +++ b/src/gallium/drivers/r300/r300_cs.h @@ -0,0 +1,129 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_CS_H +#define R300_CS_H + +#include "r300_reg.h" +#include "r300_winsys.h" + +/* Pack a 32-bit float into a dword. */ +static uint32_t pack_float_32(float f) +{ +    union { +        float f; +        uint32_t u; +    } u; + +    u.f = f; +    return u.u; +} + +/* Yes, I know macros are ugly. However, they are much prettier than the code + * that they neatly hide away, and don't have the cost of function setup,so + * we're going to use them. */ + +#define MAX_CS_SIZE 64 * 1024 / 4 + +/* XXX stolen from radeon_drm.h */ +#define RADEON_GEM_DOMAIN_CPU  0x1 +#define RADEON_GEM_DOMAIN_GTT  0x2 +#define RADEON_GEM_DOMAIN_VRAM 0x4 + +/* XXX stolen from radeon_reg.h */ +#define RADEON_CP_PACKET0 0x0 + +#define CP_PACKET0(register, count) \ +    (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2)) + +#define CP_PACKET3(op, count) \ +    (RADEON_CP_PACKET3 | (op) | ((count) << 16)) + +#define CS_LOCALS(context) \ +    struct r300_winsys* cs_winsys = context->winsys; \ +    struct radeon_cs* cs = cs_winsys->cs; \ +    int cs_count = 0; + +#define CHECK_CS(size) \ +    cs_winsys->check_cs(cs, (size)) + +#define BEGIN_CS(size) do { \ +    CHECK_CS(size); \ +    debug_printf("r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \ +        size, __FUNCTION__, __FILE__, __LINE__); \ +    cs_winsys->begin_cs(cs, (size), __FILE__, __FUNCTION__, __LINE__); \ +    cs_count = size; \ +} while (0) + +#define OUT_CS(value) do { \ +    cs_winsys->write_cs_dword(cs, (value)); \ +    cs_count--; \ +} while (0) + +#define OUT_CS_32F(value) do { \ +    cs_winsys->write_cs_dword(cs, pack_float_32(value)); \ +    cs_count--; \ +} while (0) + +#define OUT_CS_REG(register, value) do { \ +    debug_printf("r300: writing 0x%08X to register 0x%04X\n", \ +        value, register); \ +    assert(register); \ +    OUT_CS(CP_PACKET0(register, 0)); \ +    OUT_CS(value); \ +} while (0) + +/* Note: This expects count to be the number of registers, + * not the actual packet0 count! */ +#define OUT_CS_REG_SEQ(register, count) do { \ +    debug_printf("r300: writing register sequence of %d to 0x%04X\n", \ +        count, register); \ +    assert(register); \ +    OUT_CS(CP_PACKET0(register, ((count) - 1))); \ +} while (0) + +#define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ +    debug_printf("r300: writing relocation for buffer %p, offset %d\n", \ +        bo, offset); \ +    assert(bo); \ +    OUT_CS(offset); \ +    cs_winsys->write_cs_reloc(cs, bo, rd, wd, flags); \ +    cs_count -= 2; \ +} while (0) + +#define END_CS do { \ +    debug_printf("r300: END_CS in %s (%s:%d)\n", __FUNCTION__, __FILE__, \ +        __LINE__); \ +    if (cs_count != 0) \ +        debug_printf("r300: Warning: cs_count off by %d\n", cs_count); \ +    cs_winsys->end_cs(cs, __FILE__, __FUNCTION__, __LINE__); \ +} while (0) + +#define FLUSH_CS do { \ +    debug_printf("r300: FLUSH_CS in %s (%s:%d)\n", __FUNCTION__, __FILE__, \ +        __LINE__); \ +    cs_winsys->flush_cs(cs); \ +} while (0) + +#include "r300_cs_inlines.h" + +#endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_cs_inlines.h b/src/gallium/drivers/r300/r300_cs_inlines.h new file mode 100644 index 0000000000..71e6623699 --- /dev/null +++ b/src/gallium/drivers/r300/r300_cs_inlines.h @@ -0,0 +1,37 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/* r300_cs_inlines: This is just a handful of useful inlines for sending + * (very) common instructions to the CS buffer. Should only be included from + * r300_cs.h, probably. */ + +#ifdef R300_CS_H + +#define R300_PACIFY do { \ +    OUT_CS_REG(R300_SC_SCREENDOOR, 0x0); \ +    OUT_CS_REG(RADEON_WAIT_UNTIL, (1 << 15) | (1 << 17) | \ +        (1 << 18) | (1 << 31)); \ +    OUT_CS_REG(R300_SC_SCREENDOOR, 0xffffff); \ +} while (0) + + +#endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c new file mode 100644 index 0000000000..585a9e729d --- /dev/null +++ b/src/gallium/drivers/r300/r300_emit.c @@ -0,0 +1,155 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/* r300_emit: Functions for emitting state. */ + +#include "r300_emit.h" + +void r300_emit_blend_state(struct r300_context* r300, +                           struct r300_blend_state* blend) +{ +    CS_LOCALS(r300); +    BEGIN_CS(7); +    OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 2); +    OUT_CS(blend->blend_control); +    OUT_CS(blend->alpha_blend_control); +    OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); +    OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither); +    END_CS; +} + +void r300_emit_blend_color_state(struct r300_context* r300, +                                 struct r300_blend_color_state* bc) +{ +    struct r300_screen* r300screen = +        (struct r300_screen*)r300->context.screen; +    CS_LOCALS(r300); +    if (r300screen->caps->is_r500) { +        BEGIN_CS(3); +        OUT_CS_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2); +        OUT_CS(bc->blend_color_red_alpha); +        OUT_CS(bc->blend_color_green_blue); +        END_CS; +    } else { +        BEGIN_CS(2); +        OUT_CS_REG(R300_RB3D_BLEND_COLOR, bc->blend_color); +        END_CS; +    } +} + +void r300_emit_dsa_state(struct r300_context* r300, +                           struct r300_dsa_state* dsa) +{ +    struct r300_screen* r300screen = +        (struct r300_screen*)r300->context.screen; +    CS_LOCALS(r300); +    BEGIN_CS(r300screen->caps->is_r500 ? 8 : 8); +    OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); +    /* XXX figure out the r300 counterpart for this */ +    if (r300screen->caps->is_r500) { +        /* OUT_CS_REG(R500_FG_ALPHA_VALUE, dsa->alpha_reference); */ +    } +    OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); +    OUT_CS(dsa->z_buffer_control); +    OUT_CS(dsa->z_stencil_control); +    OUT_CS(dsa->stencil_ref_mask); +    OUT_CS_REG(R300_ZB_ZTOP, dsa->z_buffer_top); +    if (r300screen->caps->is_r500) { +        /* OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); */ +    } +    END_CS; +} + +/* XXX add pitch, stride, z/stencil buf */ +void r300_emit_fb_state(struct r300_context* r300, +                        struct pipe_framebuffer_state* fb) +{ +    CS_LOCALS(r300); +    struct r300_texture* tex; +    int i; + +    BEGIN_CS((3 * fb->nr_cbufs) + 6); +    for (i = 0; i < fb->nr_cbufs; i++) { +        tex = (struct r300_texture*)fb->cbufs[i]->texture; +        OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); +        OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); +    } +    R300_PACIFY; +    END_CS; +} + +void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) +{ +    struct r300_screen* r300screen = +        (struct r300_screen*)r300->context.screen; +    CS_LOCALS(r300); +    BEGIN_CS(14); +    OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); +    OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 6); +    OUT_CS(rs->depth_scale_front); +    OUT_CS(rs->depth_offset_front); +    OUT_CS(rs->depth_scale_back); +    OUT_CS(rs->depth_offset_back); +    OUT_CS(rs->polygon_offset_enable); +    OUT_CS(rs->cull_mode); +    OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config); +    OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value); +    END_CS; +} + +static void r300_emit_dirty_state(struct r300_context* r300) +{ +    struct r300_screen* r300screen = +        (struct r300_screen*)r300->context.screen; +    CS_LOCALS(r300); + +    if (!(r300->dirty_state) && !(r300->dirty_hw)) { +        return; +    } + +    /* XXX check size */ + +    if (r300->dirty_state & R300_NEW_BLEND) { +        r300_emit_blend_state(r300, r300->blend_state); +    } + +    if (r300->dirty_state & R300_NEW_BLEND_COLOR) { +        r300_emit_blend_color_state(r300, r300->blend_color_state); +    } + +    if (r300->dirty_state & R300_NEW_DSA) { +        r300_emit_dsa_state(r300, r300->dsa_state); +    } + +    if (r300->dirty_state & R300_NEW_RASTERIZER) { +        r300_emit_rs_state(r300, r300->rs_state); +    } + +    if (r300->dirty_state & R300_NEW_SCISSOR) { +        struct r300_scissor_state* scissor = r300->scissor_state; +        /* XXX next two are contiguous regs */ +        OUT_CS_REG(R300_SC_SCISSORS_TL, scissor->scissor_top_left); +        OUT_CS_REG(R300_SC_SCISSORS_BR, scissor->scissor_bottom_right); +    } + +    r300->dirty_state = 0; +} diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h new file mode 100644 index 0000000000..b6e69386f9 --- /dev/null +++ b/src/gallium/drivers/r300/r300_emit.h @@ -0,0 +1,36 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_context.h" +#include "r300_cs.h" +#include "r300_screen.h" + +void r300_emit_blend_state(struct r300_context* r300, +                           struct r300_blend_state* blend); + +void r300_emit_blend_color_state(struct r300_context* r300, +                                 struct r300_blend_color_state* bc); + +void r300_emit_dsa_state(struct r300_context* r300, +                         struct r300_dsa_state* dsa); + +void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c new file mode 100644 index 0000000000..3766f0a0a7 --- /dev/null +++ b/src/gallium/drivers/r300/r300_flush.c @@ -0,0 +1,42 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_flush.h" + +static void r300_flush(struct pipe_context* pipe, +                       unsigned flags, +                       struct pipe_fence_handle** fence) +{ +    struct r300_context* r300 = r300_context(pipe); +    CS_LOCALS(r300); + +    if (r300->dirty_hw) { +        FLUSH_CS; +        r300->dirty_state = R300_NEW_KITCHEN_SINK; +        r300->dirty_hw = 0; +    } +} + +void r300_init_flush_functions(struct r300_context* r300) +{ +    r300->context.flush = r300_flush; +} diff --git a/src/gallium/drivers/r300/r300_flush.h b/src/gallium/drivers/r300/r300_flush.h new file mode 100644 index 0000000000..a1b224b39c --- /dev/null +++ b/src/gallium/drivers/r300/r300_flush.h @@ -0,0 +1,33 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_FLUSH_H +#define R300_FLUSH_H + +#include "pipe/p_context.h" + +#include "r300_context.h" +#include "r300_cs.h" + +void r300_init_flush_functions(struct r300_context* r300); + +#endif /* R300_FLUSH_H */ diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h new file mode 100644 index 0000000000..dbd0cc28e2 --- /dev/null +++ b/src/gallium/drivers/r300/r300_reg.h @@ -0,0 +1,3290 @@ +/************************************************************************** + +Copyright (C) 2004-2005 Nicolai Haehnle et al. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* *INDENT-OFF* */ + +#ifndef _R300_REG_H +#define _R300_REG_H + +#define R300_MC_INIT_MISC_LAT_TIMER	0x180 +#	define R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT	0 +#	define R300_MC_MISC__MC_VF_INIT_LAT_SHIFT	4 +#	define R300_MC_MISC__MC_DISP0R_INIT_LAT_SHIFT	8 +#	define R300_MC_MISC__MC_DISP1R_INIT_LAT_SHIFT	12 +#	define R300_MC_MISC__MC_FIXED_INIT_LAT_SHIFT	16 +#	define R300_MC_MISC__MC_E2R_INIT_LAT_SHIFT	20 +#	define R300_MC_MISC__MC_SAME_PAGE_PRIO_SHIFT	24 +#	define R300_MC_MISC__MC_GLOBW_INIT_LAT_SHIFT	28 + + +#define R300_MC_INIT_GFX_LAT_TIMER	0x154 +#	define R300_MC_MISC__MC_G3D0R_INIT_LAT_SHIFT	0 +#	define R300_MC_MISC__MC_G3D1R_INIT_LAT_SHIFT	4 +#	define R300_MC_MISC__MC_G3D2R_INIT_LAT_SHIFT	8 +#	define R300_MC_MISC__MC_G3D3R_INIT_LAT_SHIFT	12 +#	define R300_MC_MISC__MC_TX0R_INIT_LAT_SHIFT	16 +#	define R300_MC_MISC__MC_TX1R_INIT_LAT_SHIFT	20 +#	define R300_MC_MISC__MC_GLOBR_INIT_LAT_SHIFT	24 +#	define R300_MC_MISC__MC_GLOBW_FULL_LAT_SHIFT	28 + +/* + * This file contains registers and constants for the R300. They have been + * found mostly by examining command buffers captured using glxtest, as well + * as by extrapolating some known registers and constants from the R200. + * I am fairly certain that they are correct unless stated otherwise + * in comments. + */ + +#define R300_SE_VPORT_XSCALE                0x1D98 +#define R300_SE_VPORT_XOFFSET               0x1D9C +#define R300_SE_VPORT_YSCALE                0x1DA0 +#define R300_SE_VPORT_YOFFSET               0x1DA4 +#define R300_SE_VPORT_ZSCALE                0x1DA8 +#define R300_SE_VPORT_ZOFFSET               0x1DAC + + +/* + * Vertex Array Processing (VAP) Control + */ +#define R300_VAP_CNTL	0x2080 +#       define R300_PVS_NUM_SLOTS_SHIFT                 0 +#       define R300_PVS_NUM_CNTLRS_SHIFT                4 +#       define R300_PVS_NUM_FPUS_SHIFT                  8 +#       define R300_VF_MAX_VTX_NUM_SHIFT                18 +#       define R300_GL_CLIP_SPACE_DEF                   (0 << 22) +#       define R300_DX_CLIP_SPACE_DEF                   (1 << 22) +#       define R500_TCL_STATE_OPTIMIZATION              (1 << 23) + +/* This register is written directly and also starts data section + * in many 3d CP_PACKET3's + */ +#define R300_VAP_VF_CNTL	0x2084 +#	define	R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT              0 +#	define  R300_VAP_VF_CNTL__PRIM_NONE                     (0<<0) +#	define  R300_VAP_VF_CNTL__PRIM_POINTS                   (1<<0) +#	define  R300_VAP_VF_CNTL__PRIM_LINES                    (2<<0) +#	define  R300_VAP_VF_CNTL__PRIM_LINE_STRIP               (3<<0) +#	define  R300_VAP_VF_CNTL__PRIM_TRIANGLES                (4<<0) +#	define  R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN             (5<<0) +#	define  R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP           (6<<0) +#	define  R300_VAP_VF_CNTL__PRIM_LINE_LOOP                (12<<0) +#	define  R300_VAP_VF_CNTL__PRIM_QUADS                    (13<<0) +#	define  R300_VAP_VF_CNTL__PRIM_QUAD_STRIP               (14<<0) +#	define  R300_VAP_VF_CNTL__PRIM_POLYGON                  (15<<0) + +#	define	R300_VAP_VF_CNTL__PRIM_WALK__SHIFT              4 +	/* State based - direct writes to registers trigger vertex +           generation */ +#	define	R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED         (0<<4) +#	define	R300_VAP_VF_CNTL__PRIM_WALK_INDICES             (1<<4) +#	define	R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST         (2<<4) +#	define	R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED     (3<<4) + +	/* I don't think I saw these three used.. */ +#	define	R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT            6 +#	define	R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT     9 +#	define	R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT        10 + +	/* index size - when not set the indices are assumed to be 16 bit */ +#	define	R300_VAP_VF_CNTL__INDEX_SIZE_32bit              (1<<11) +	/* number of vertices */ +#	define	R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT           16 + +#define R500_VAP_INDEX_OFFSET		    0x208c + +#define R300_VAP_OUTPUT_VTX_FMT_0           0x2090 +#       define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT     (1<<0) +#       define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT (1<<1) +#       define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2) +#       define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3) +#       define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4) +#       define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16) + +#define R300_VAP_OUTPUT_VTX_FMT_1           0x2094 +	/* each of the following is 3 bits wide, specifies number +	   of components */ +#       define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 +#       define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 +#       define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 +#       define R300_VAP_OUTPUT_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9 +#       define R300_VAP_OUTPUT_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12 +#       define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 +#       define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 +#       define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 +#	define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT  0 +#	define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT  1 +#	define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS 2 +#	define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS 3 +#	define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4 + +#define R300_SE_VTE_CNTL                  0x20b0 +#	define     R300_VPORT_X_SCALE_ENA                (1 << 0) +#	define     R300_VPORT_X_OFFSET_ENA               (1 << 1) +#	define     R300_VPORT_Y_SCALE_ENA                (1 << 2) +#	define     R300_VPORT_Y_OFFSET_ENA               (1 << 3) +#	define     R300_VPORT_Z_SCALE_ENA                (1 << 4) +#	define     R300_VPORT_Z_OFFSET_ENA               (1 << 5) +#	define     R300_VTX_XY_FMT                       (1 << 8) +#	define     R300_VTX_Z_FMT                        (1 << 9) +#	define     R300_VTX_W0_FMT                       (1 << 10) +#	define     R300_SERIAL_PROC_ENA                  (1 << 11) + +#define R300_VAP_VTX_SIZE               0x20b4 + +/* BEGIN: Vertex data assembly - lots of uncertainties */ + +/* gap */ + +/* Maximum Vertex Indx Clamp */ +#define R300_VAP_VF_MAX_VTX_INDX         0x2134 +/* Minimum Vertex Indx Clamp */ +#define R300_VAP_VF_MIN_VTX_INDX         0x2138 + +/** Vertex assembler/processor control status */ +#define R300_VAP_CNTL_STATUS              0x2140 +/* No swap at all (default) */ +#	define R300_VC_NO_SWAP                  (0 << 0) +/* 16-bit swap: 0xAABBCCDD becomes 0xBBAADDCC */ +#	define R300_VC_16BIT_SWAP               (1 << 0) +/* 32-bit swap: 0xAABBCCDD becomes 0xDDCCBBAA */ +#	define R300_VC_32BIT_SWAP               (2 << 0) +/* Half-dword swap: 0xAABBCCDD becomes 0xCCDDAABB */ +#	define R300_VC_HALF_DWORD_SWAP          (3 << 0) +/* The TCL engine will not be used (as it is logically or even physically removed) */ +#	define R300_VAP_TCL_BYPASS		(1 << 8) +/* Read only flag if TCL engine is busy. */ +#	define R300_VAP_PVS_BUSY                (1 << 11) +/* TODO: gap for MAX_MPS */ +/* Read only flag if the vertex store is busy. */ +#	define R300_VAP_VS_BUSY                 (1 << 24) +/* Read only flag if the reciprocal engine is busy. */ +#	define R300_VAP_RCP_BUSY                (1 << 25) +/* Read only flag if the viewport transform engine is busy. */ +#	define R300_VAP_VTE_BUSY                (1 << 26) +/* Read only flag if the memory interface unit is busy. */ +#	define R300_VAP_MUI_BUSY                (1 << 27) +/* Read only flag if the vertex cache is busy. */ +#	define R300_VAP_VC_BUSY                 (1 << 28) +/* Read only flag if the vertex fetcher is busy. */ +#	define R300_VAP_VF_BUSY                 (1 << 29) +/* Read only flag if the register pipeline is busy. */ +#	define R300_VAP_REGPIPE_BUSY            (1 << 30) +/* Read only flag if the VAP engine is busy. */ +#	define R300_VAP_VAP_BUSY                (1 << 31) + +/* gap */ + +/* Where do we get our vertex data? + * + * Vertex data either comes either from immediate mode registers or from + * vertex arrays. + * There appears to be no mixed mode (though we can force the pitch of + * vertex arrays to 0, effectively reusing the same element over and over + * again). + * + * Immediate mode is controlled by the INPUT_CNTL registers. I am not sure + * if these registers influence vertex array processing. + * + * Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3. + * + * In both cases, vertex attributes are then passed through INPUT_ROUTE. + * + * Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data + * into the vertex processor's input registers. + * The first word routes the first input, the second word the second, etc. + * The corresponding input is routed into the register with the given index. + * The list is ended by a word with INPUT_ROUTE_END set. + * + * Always set COMPONENTS_4 in immediate mode. + */ + +#define R300_VAP_PROG_STREAM_CNTL_0                     0x2150 +#       define R300_DATA_TYPE_0_SHIFT                   0 +#       define R300_DATA_TYPE_FLOAT_1                   0 +#       define R300_DATA_TYPE_FLOAT_2                   1 +#       define R300_DATA_TYPE_FLOAT_3                   2 +#       define R300_DATA_TYPE_FLOAT_4                   3 +#       define R300_DATA_TYPE_BYTE                      4 +#       define R300_DATA_TYPE_D3DCOLOR                  5 +#       define R300_DATA_TYPE_SHORT_2                   6 +#       define R300_DATA_TYPE_SHORT_4                   7 +#       define R300_DATA_TYPE_VECTOR_3_TTT              8 +#       define R300_DATA_TYPE_VECTOR_3_EET              9 +#       define R300_SKIP_DWORDS_SHIFT                   4 +#       define R300_DST_VEC_LOC_SHIFT                   8 +#       define R300_LAST_VEC                            (1 << 13) +#       define R300_SIGNED                              (1 << 14) +#       define R300_NORMALIZE                           (1 << 15) +#       define R300_DATA_TYPE_1_SHIFT                   16 +#define R300_VAP_PROG_STREAM_CNTL_1                     0x2154 +#define R300_VAP_PROG_STREAM_CNTL_2                     0x2158 +#define R300_VAP_PROG_STREAM_CNTL_3                     0x215C +#define R300_VAP_PROG_STREAM_CNTL_4                     0x2160 +#define R300_VAP_PROG_STREAM_CNTL_5                     0x2164 +#define R300_VAP_PROG_STREAM_CNTL_6                     0x2168 +#define R300_VAP_PROG_STREAM_CNTL_7                     0x216C +/* gap */ + +/* Notes: + *  - always set up to produce at least two attributes: + *    if vertex program uses only position, fglrx will set normal, too + *  - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal. + */ +#define R300_VAP_VTX_STATE_CNTL               0x2180 +#       define R300_COLOR_0_ASSEMBLY_SHIFT    0 +#       define R300_SEL_COLOR                 0 +#       define R300_SEL_USER_COLOR_0          1 +#       define R300_SEL_USER_COLOR_1          2 +#       define R300_COLOR_1_ASSEMBLY_SHIFT    2 +#       define R300_COLOR_2_ASSEMBLY_SHIFT    4 +#       define R300_COLOR_3_ASSEMBLY_SHIFT    6 +#       define R300_COLOR_4_ASSEMBLY_SHIFT    8 +#       define R300_COLOR_5_ASSEMBLY_SHIFT    10 +#       define R300_COLOR_6_ASSEMBLY_SHIFT    12 +#       define R300_COLOR_7_ASSEMBLY_SHIFT    14 +#       define R300_UPDATE_USER_COLOR_0_ENA   (1 << 16) + +/* + * Each bit in this field applies to the corresponding vector in the VSM + * memory (i.e. Bit 0 applies to VECTOR_0 (POSITION), etc.). If the bit + * is set, then the corresponding 4-Dword Vector is output into the Vertex Stream. + */ +#define R300_VAP_VSM_VTX_ASSM               0x2184 +#       define R300_INPUT_CNTL_POS               0x00000001 +#       define R300_INPUT_CNTL_NORMAL            0x00000002 +#       define R300_INPUT_CNTL_COLOR             0x00000004 +#       define R300_INPUT_CNTL_TC0               0x00000400 +#       define R300_INPUT_CNTL_TC1               0x00000800 +#       define R300_INPUT_CNTL_TC2               0x00001000 /* GUESS */ +#       define R300_INPUT_CNTL_TC3               0x00002000 /* GUESS */ +#       define R300_INPUT_CNTL_TC4               0x00004000 /* GUESS */ +#       define R300_INPUT_CNTL_TC5               0x00008000 /* GUESS */ +#       define R300_INPUT_CNTL_TC6               0x00010000 /* GUESS */ +#       define R300_INPUT_CNTL_TC7               0x00020000 /* GUESS */ + +/* Programmable Stream Control Signed Normalize Control */ +#define R300_VAP_PSC_SGN_NORM_CNTL         0x21dc +#	define SGN_NORM_ZERO                 0 +#	define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1 +#	define SGN_NORM_NO_ZERO              2 + +/* gap */ + +/* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0 + * are set to a swizzling bit pattern, other words are 0. + * + * In immediate mode, the pattern is always set to xyzw. In vertex array + * mode, the swizzling pattern is e.g. used to set zw components in texture + * coordinates with only tweo components. + */ +#define R300_VAP_PROG_STREAM_CNTL_EXT_0                 0x21e0 +#       define R300_SWIZZLE0_SHIFT                      0 +#       define R300_SWIZZLE_SELECT_X_SHIFT              0 +#       define R300_SWIZZLE_SELECT_Y_SHIFT              3 +#       define R300_SWIZZLE_SELECT_Z_SHIFT              6 +#       define R300_SWIZZLE_SELECT_W_SHIFT              9 + +#       define R300_SWIZZLE_SELECT_X                    0 +#       define R300_SWIZZLE_SELECT_Y                    1 +#       define R300_SWIZZLE_SELECT_Z                    2 +#       define R300_SWIZZLE_SELECT_W                    3 +#       define R300_SWIZZLE_SELECT_FP_ZERO              4 +#       define R300_SWIZZLE_SELECT_FP_ONE               5 +/* alternate forms for r300_emit.c */ +#       define R300_INPUT_ROUTE_SELECT_X    0 +#       define R300_INPUT_ROUTE_SELECT_Y    1 +#       define R300_INPUT_ROUTE_SELECT_Z    2 +#       define R300_INPUT_ROUTE_SELECT_W    3 +#       define R300_INPUT_ROUTE_SELECT_ZERO 4 +#       define R300_INPUT_ROUTE_SELECT_ONE  5 + +#       define R300_WRITE_ENA_SHIFT                     12 +#       define R300_WRITE_ENA_X                         1 +#       define R300_WRITE_ENA_Y                         2 +#       define R300_WRITE_ENA_Z                         4 +#       define R300_WRITE_ENA_W                         8 +#       define R300_SWIZZLE1_SHIFT                      16 +#define R300_VAP_PROG_STREAM_CNTL_EXT_1                 0x21e4 +#define R300_VAP_PROG_STREAM_CNTL_EXT_2                 0x21e8 +#define R300_VAP_PROG_STREAM_CNTL_EXT_3                 0x21ec +#define R300_VAP_PROG_STREAM_CNTL_EXT_4                 0x21f0 +#define R300_VAP_PROG_STREAM_CNTL_EXT_5                 0x21f4 +#define R300_VAP_PROG_STREAM_CNTL_EXT_6                 0x21f8 +#define R300_VAP_PROG_STREAM_CNTL_EXT_7                 0x21fc + +/* END: Vertex data assembly */ + +/* gap */ + +/* BEGIN: Upload vertex program and data */ + +/* + * The programmable vertex shader unit has a memory bank of unknown size + * that can be written to in 16 byte units by writing the address into + * UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs). + * + * Pointers into the memory bank are always in multiples of 16 bytes. + * + * The memory bank is divided into areas with fixed meaning. + * + * Starting at address UPLOAD_PROGRAM: Vertex program instructions. + * Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB), + * whereas the difference between known addresses suggests size 512. + * + * Starting at address UPLOAD_PARAMETERS: Vertex program parameters. + * Native reported limits and the VPI layout suggest size 256, whereas + * difference between known addresses suggests size 512. + * + * At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the + * floating point pointsize. The exact purpose of this state is uncertain, + * as there is also the R300_RE_POINTSIZE register. + * + * Multiple vertex programs and parameter sets can be loaded at once, + * which could explain the size discrepancy. + */ +#define R300_VAP_PVS_VECTOR_INDX_REG         0x2200 +#       define R300_PVS_CODE_START           0 +#       define R300_MAX_PVS_CODE_LINES       256 +#       define R500_MAX_PVS_CODE_LINES       1024 +#       define R300_PVS_CONST_START          512 +#       define R500_PVS_CONST_START          1024 +#       define R300_MAX_PVS_CONST_VECS       256 +#       define R500_MAX_PVS_CONST_VECS       1024 +#       define R300_PVS_UCP_START            1024 +#       define R500_PVS_UCP_START            1536 +#       define R300_POINT_VPORT_SCALE_OFFSET 1030 +#       define R500_POINT_VPORT_SCALE_OFFSET 1542 +#       define R300_POINT_GEN_TEX_OFFSET     1031 +#       define R500_POINT_GEN_TEX_OFFSET     1543 + +/* + * These are obsolete defines form r300_context.h, but they might give some + * clues when investigating the addresses further... + */ +#if 0 +#define VSF_DEST_PROGRAM        0x0 +#define VSF_DEST_MATRIX0        0x200 +#define VSF_DEST_MATRIX1        0x204 +#define VSF_DEST_MATRIX2        0x208 +#define VSF_DEST_VECTOR0        0x20c +#define VSF_DEST_VECTOR1        0x20d +#define VSF_DEST_UNKNOWN1       0x400 +#define VSF_DEST_UNKNOWN2       0x406 +#endif + +/* gap */ + +#define R300_VAP_PVS_UPLOAD_DATA            0x2208 + +/* END: Upload vertex program and data */ + +/* gap */ + +/* I do not know the purpose of this register. However, I do know that + * it is set to 221C_CLEAR for clear operations and to 221C_NORMAL + * for normal rendering. + * + * 2007-11-05: This register is the user clip plane control register, but there + * also seems to be a rendering mode control; the NORMAL/CLEAR defines. + * + * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view + */ +#define R300_VAP_CLIP_CNTL                       0x221C +#       define R300_VAP_UCP_ENABLE_0             (1 << 0) +#       define R300_VAP_UCP_ENABLE_1             (1 << 1) +#       define R300_VAP_UCP_ENABLE_2             (1 << 2) +#       define R300_VAP_UCP_ENABLE_3             (1 << 3) +#       define R300_VAP_UCP_ENABLE_4             (1 << 4) +#       define R300_VAP_UCP_ENABLE_5             (1 << 5) +#       define R300_PS_UCP_MODE_DIST_COP         (0 << 14) +#       define R300_PS_UCP_MODE_RADIUS_COP       (1 << 14) +#       define R300_PS_UCP_MODE_RADIUS_COP_CLIP  (2 << 14) +#       define R300_PS_UCP_MODE_CLIP_AS_TRIFAN   (3 << 14) +#       define R300_CLIP_DISABLE                 (1 << 16) +#       define R300_UCP_CULL_ONLY_ENABLE         (1 << 17) +#       define R300_BOUNDARY_EDGE_FLAG_ENABLE    (1 << 18) +#       define R500_COLOR2_IS_TEXTURE            (1 << 20) +#       define R500_COLOR3_IS_TEXTURE            (1 << 21) + +/* These seem to be per-pixel and per-vertex X and Y clipping planes. The first + * plane is per-pixel and the second plane is per-vertex. + * + * This was determined by experimentation alone but I believe it is correct. + * + * These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest. + */ +#define R300_VAP_GB_VERT_CLIP_ADJ                   0x2220 +#define R300_VAP_GB_VERT_DISC_ADJ                   0x2224 +#define R300_VAP_GB_HORZ_CLIP_ADJ                   0x2228 +#define R300_VAP_GB_HORZ_DISC_ADJ                   0x222c + +/* gap */ + +/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between + * rendering commands and overwriting vertex program parameters. + * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and + * avoids bugs caused by still running shaders reading bad data from memory. + */ +#define R300_VAP_PVS_STATE_FLUSH_REG        0x2284 + +/* This register is used to define the number of core clocks to wait for a + * vertex to be received by the VAP input controller (while the primitive + * path is backed up) before forcing any accumulated vertices to be submitted + * to the vertex processing path. + */ +#define VAP_PVS_VTX_TIMEOUT_REG             0x2288 +#       define R300_2288_R300                    0x00750000 /* -- nh */ +#       define R300_2288_RV350                   0x0000FFFF /* -- Vladimir */ + +/* gap */ + +/* Addresses are relative to the vertex program instruction area of the + * memory bank. PROGRAM_END points to the last instruction of the active + * program + * + * The meaning of the two UNKNOWN fields is obviously not known. However, + * experiments so far have shown that both *must* point to an instruction + * inside the vertex program, otherwise the GPU locks up. + * + * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and + * R300_PVS_CNTL_1_POS_END_SHIFT points to instruction where last write to + * position takes place. + * + * Most likely this is used to ignore rest of the program in cases + * where group of verts arent visible. For some reason this "section" + * is sometimes accepted other instruction that have no relationship with + * position calculations. + */ +#define R300_VAP_PVS_CODE_CNTL_0            0x22D0 +#       define R300_PVS_FIRST_INST_SHIFT         0 +#       define R300_PVS_XYZW_VALID_INST_SHIFT    10 +#       define R300_PVS_LAST_INST_SHIFT          20 +/* Addresses are relative the the vertex program parameters area. */ +#define R300_VAP_PVS_CONST_CNTL             0x22D4 +#       define R300_PVS_CONST_BASE_OFFSET_SHIFT  0 +#       define R300_PVS_MAX_CONST_ADDR_SHIFT     16 +#define R300_VAP_PVS_CODE_CNTL_1	    0x22D8 +#       define R300_PVS_LAST_VTX_SRC_INST_SHIFT  0 +#define R300_VAP_PVS_FLOW_CNTL_OPC          0x22DC + +/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for + * immediate vertices + */ +#define R300_VAP_VTX_COLOR_R                0x2464 +#define R300_VAP_VTX_COLOR_G                0x2468 +#define R300_VAP_VTX_COLOR_B                0x246C +#define R300_VAP_VTX_POS_0_X_1              0x2490 /* used for glVertex2*() */ +#define R300_VAP_VTX_POS_0_Y_1              0x2494 +#define R300_VAP_VTX_COLOR_PKD              0x249C /* RGBA */ +#define R300_VAP_VTX_POS_0_X_2              0x24A0 /* used for glVertex3*() */ +#define R300_VAP_VTX_POS_0_Y_2              0x24A4 +#define R300_VAP_VTX_POS_0_Z_2              0x24A8 +/* write 0 to indicate end of packet? */ +#define R300_VAP_VTX_END_OF_PKT             0x24AC + +/* gap */ + +/* These are values from r300_reg/r300_reg.h - they are known to be correct + * and are here so we can use one register file instead of several + * - Vladimir + */ +#define R300_GB_VAP_RASTER_VTX_FMT_0	0x4000 +#	define R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT	(1<<0) +#	define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT	(1<<1) +#	define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_1_PRESENT	(1<<2) +#	define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_2_PRESENT	(1<<3) +#	define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_3_PRESENT	(1<<4) +#	define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_SPACE	(0xf<<5) +#	define R300_GB_VAP_RASTER_VTX_FMT_0__PT_SIZE_PRESENT	(0x1<<16) + +#define R300_GB_VAP_RASTER_VTX_FMT_1	0x4004 +	/* each of the following is 3 bits wide, specifies number +	   of components */ +#	define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT	0 +#	define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT	3 +#	define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT	6 +#	define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT	9 +#	define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT	12 +#	define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT	15 +#	define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT	18 +#	define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT	21 + +/* UNK30 seems to enables point to quad transformation on textures + * (or something closely related to that). + * This bit is rather fatal at the time being due to lackings at pixel + * shader side + * Specifies top of Raster pipe specific enable controls. + */ +#define R300_GB_ENABLE	0x4008 +#	define R300_GB_POINT_STUFF_DISABLE     (0 << 0) +#	define R300_GB_POINT_STUFF_ENABLE      (1 << 0) /* Specifies if points will have stuffed texture coordinates. */ +#	define R300_GB_LINE_STUFF_DISABLE      (0 << 1) +#	define R300_GB_LINE_STUFF_ENABLE       (1 << 1) /* Specifies if lines will have stuffed texture coordinates. */ +#	define R300_GB_TRIANGLE_STUFF_DISABLE  (0 << 2) +#	define R300_GB_TRIANGLE_STUFF_ENABLE   (1 << 2) /* Specifies if triangles will have stuffed texture coordinates. */ +#	define R300_GB_STENCIL_AUTO_DISABLE    (0 << 4) +#	define R300_GB_STENCIL_AUTO_ENABLE     (1 << 4) /* Enable stencil auto inc/dec based on triangle cw/ccw, force into dzy low bit. */ +#	define R300_GB_STENCIL_AUTO_FORCE      (2 << 4) /* Force 0 into dzy low bit. */ + +	/* each of the following is 2 bits wide */ +#define R300_GB_TEX_REPLICATE	0 /* Replicate VAP source texture coordinates (S,T,[R,Q]). */ +#define R300_GB_TEX_ST		1 /* Stuff with source texture coordinates (S,T). */ +#define R300_GB_TEX_STR		2 /* Stuff with source texture coordinates (S,T,R). */ +#	define R300_GB_TEX0_SOURCE_SHIFT	16 +#	define R300_GB_TEX1_SOURCE_SHIFT	18 +#	define R300_GB_TEX2_SOURCE_SHIFT	20 +#	define R300_GB_TEX3_SOURCE_SHIFT	22 +#	define R300_GB_TEX4_SOURCE_SHIFT	24 +#	define R300_GB_TEX5_SOURCE_SHIFT	26 +#	define R300_GB_TEX6_SOURCE_SHIFT	28 +#	define R300_GB_TEX7_SOURCE_SHIFT	30 + +/* MSPOS - positions for multisample antialiasing (?) */ +#define R300_GB_MSPOS0                           0x4010 +	/* shifts - each of the fields is 4 bits */ +#	define R300_GB_MSPOS0__MS_X0_SHIFT	0 +#	define R300_GB_MSPOS0__MS_Y0_SHIFT	4 +#	define R300_GB_MSPOS0__MS_X1_SHIFT	8 +#	define R300_GB_MSPOS0__MS_Y1_SHIFT	12 +#	define R300_GB_MSPOS0__MS_X2_SHIFT	16 +#	define R300_GB_MSPOS0__MS_Y2_SHIFT	20 +#	define R300_GB_MSPOS0__MSBD0_Y		24 +#	define R300_GB_MSPOS0__MSBD0_X		28 + +#define R300_GB_MSPOS1                           0x4014 +#	define R300_GB_MSPOS1__MS_X3_SHIFT	0 +#	define R300_GB_MSPOS1__MS_Y3_SHIFT	4 +#	define R300_GB_MSPOS1__MS_X4_SHIFT	8 +#	define R300_GB_MSPOS1__MS_Y4_SHIFT	12 +#	define R300_GB_MSPOS1__MS_X5_SHIFT	16 +#	define R300_GB_MSPOS1__MS_Y5_SHIFT	20 +#	define R300_GB_MSPOS1__MSBD1		24 + +/* Specifies the graphics pipeline configuration for rasterization. */ +#define R300_GB_TILE_CONFIG                      0x4018 +#	define R300_GB_TILE_DISABLE             (0 << 0) +#	define R300_GB_TILE_ENABLE              (1 << 0) +#	define R300_GB_TILE_PIPE_COUNT_RV300	(0 << 1) /* RV350 (1 pipe, 1 ctx) */ +#	define R300_GB_TILE_PIPE_COUNT_R300	(3 << 1) /* R300 (2 pipes, 1 ctx) */ +#	define R300_GB_TILE_PIPE_COUNT_R420_3P  (6 << 1) /* R420-3P (3 pipes, 1 ctx) */ +#	define R300_GB_TILE_PIPE_COUNT_R420	(7 << 1) /* R420 (4 pipes, 1 ctx) */ +#	define R300_GB_TILE_SIZE_8		(0 << 4) +#	define R300_GB_TILE_SIZE_16		(1 << 4) +#	define R300_GB_TILE_SIZE_32		(2 << 4) +#	define R300_GB_SUPER_SIZE_1		(0 << 6) +#	define R300_GB_SUPER_SIZE_2		(1 << 6) +#	define R300_GB_SUPER_SIZE_4		(2 << 6) +#	define R300_GB_SUPER_SIZE_8		(3 << 6) +#	define R300_GB_SUPER_SIZE_16		(4 << 6) +#	define R300_GB_SUPER_SIZE_32		(5 << 6) +#	define R300_GB_SUPER_SIZE_64		(6 << 6) +#	define R300_GB_SUPER_SIZE_128		(7 << 6) +#	define R300_GB_SUPER_X_SHIFT		9	/* 3 bits wide */ +#	define R300_GB_SUPER_Y_SHIFT		12	/* 3 bits wide */ +#	define R300_GB_SUPER_TILE_A		(0 << 15) +#	define R300_GB_SUPER_TILE_B		(1 << 15) +#	define R300_GB_SUBPIXEL_1_12		(0 << 16) +#	define R300_GB_SUBPIXEL_1_16		(1 << 16) +#	define GB_TILE_CONFIG_QUADS_PER_RAS_4   (0 << 17) +#	define GB_TILE_CONFIG_QUADS_PER_RAS_8   (1 << 17) +#	define GB_TILE_CONFIG_QUADS_PER_RAS_16  (2 << 17) +#	define GB_TILE_CONFIG_QUADS_PER_RAS_32  (3 << 17) +#	define GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19) +#	define GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19) +#	define GB_TILE_CONFIG_ALT_SCAN_EN_LR    (0 << 20) +#	define GB_TILE_CONFIG_ALT_SCAN_EN_LRL   (1 << 20) +#	define GB_TILE_CONFIG_ALT_OFFSET        (0 << 21) +#	define GB_TILE_CONFIG_SUBPRECISION      (0 << 22) +#	define GB_TILE_CONFIG_ALT_TILING_DEF    (0 << 23) +#	define GB_TILE_CONFIG_ALT_TILING_3_2    (1 << 23) +#	define GB_TILE_CONFIG_Z_EXTENDED_24_1   (0 << 24) +#	define GB_TILE_CONFIG_Z_EXTENDED_S25_1  (1 << 24) + +/* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */ +#define R300_GB_FIFO_SIZE	0x4024 +	/* each of the following is 2 bits wide */ +#define R300_GB_FIFO_SIZE_32	0 +#define R300_GB_FIFO_SIZE_64	1 +#define R300_GB_FIFO_SIZE_128	2 +#define R300_GB_FIFO_SIZE_256	3 +#	define R300_SC_IFIFO_SIZE_SHIFT	0 +#	define R300_SC_TZFIFO_SIZE_SHIFT	2 +#	define R300_SC_BFIFO_SIZE_SHIFT	4 + +#	define R300_US_OFIFO_SIZE_SHIFT	12 +#	define R300_US_WFIFO_SIZE_SHIFT	14 +	/* the following use the same constants as above, but meaning is +	   is times 2 (i.e. instead of 32 words it means 64 */ +#	define R300_RS_TFIFO_SIZE_SHIFT	6 +#	define R300_RS_CFIFO_SIZE_SHIFT	8 +#	define R300_US_RAM_SIZE_SHIFT		10 +	/* watermarks, 3 bits wide */ +#	define R300_RS_HIGHWATER_COL_SHIFT	16 +#	define R300_RS_HIGHWATER_TEX_SHIFT	19 +#	define R300_OFIFO_HIGHWATER_SHIFT	22	/* two bits only */ +#	define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT	24 + +#define GB_Z_PEQ_CONFIG                          0x4028 +#	define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4    (0 << 0) +#	define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8    (1 << 0) + +/* Specifies various polygon specific selects (fog, depth, perspective). */ +#define R300_GB_SELECT                           0x401c +#	define R300_GB_FOG_SELECT_C0A		(0 << 0) +#	define R300_GB_FOG_SELECT_C1A           (1 << 0) +#	define R300_GB_FOG_SELECT_C2A           (2 << 0) +#	define R300_GB_FOG_SELECT_C3A           (3 << 0) +#	define R300_GB_FOG_SELECT_1_1_W         (4 << 0) +#	define R300_GB_FOG_SELECT_Z		(5 << 0) +#	define R300_GB_DEPTH_SELECT_Z		(0 << 3) +#	define R300_GB_DEPTH_SELECT_1_1_W	(1 << 3) +#	define R300_GB_W_SELECT_1_W		(0 << 4) +#	define R300_GB_W_SELECT_1		(1 << 4) +#	define R300_GB_FOG_STUFF_DISABLE        (0 << 5) +#	define R300_GB_FOG_STUFF_ENABLE         (1 << 5) +#	define R300_GB_FOG_STUFF_TEX_SHIFT      6 +#	define R300_GB_FOG_STUFF_TEX_MASK       0x000003c0 +#	define R300_GB_FOG_STUFF_COMP_SHIFT     10 +#	define R300_GB_FOG_STUFF_COMP_MASK      0x00000c00 + +/* Specifies the graphics pipeline configuration for antialiasing. */ +#define R300_GB_AA_CONFIG                         0x4020 +#	define GB_AA_CONFIG_AA_DISABLE           (0 << 0) +#	define GB_AA_CONFIG_AA_ENABLE            (1 << 0) +#	define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2  (0 << 1) +#	define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3  (1 << 1) +#	define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4  (2 << 1) +#	define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6  (3 << 1) + +/* Selects which of 4 pipes are active. */ +#define GB_PIPE_SELECT                           0x402c +#	define GB_PIPE_SELECT_PIPE0_ID_SHIFT  0 +#	define GB_PIPE_SELECT_PIPE1_ID_SHIFT  2 +#	define GB_PIPE_SELECT_PIPE2_ID_SHIFT  4 +#	define GB_PIPE_SELECT_PIPE3_ID_SHIFT  6 +#	define GB_PIPE_SELECT_PIPE_MASK_SHIFT 8 +#	define GB_PIPE_SELECT_MAX_PIPE        12 +#	define GB_PIPE_SELECT_BAD_PIPES       14 +#	define GB_PIPE_SELECT_CONFIG_PIPES    18 + + +/* Specifies the sizes of the various FIFO`s in the sc/rs. */ +#define GB_FIFO_SIZE1                            0x4070 +/* High water mark for SC input fifo */ +#	define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0 +#	define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK  0x0000003f +/* High water mark for SC input fifo (B) */ +#	define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6 +#	define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK  0x00000fc0 +/* High water mark for RS colors' fifo */ +#	define GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT   12 +#	define GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK    0x0003f000 +/* High water mark for RS textures' fifo */ +#	define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT   18 +#	define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK    0x00fc0000 + +/* This table specifies the source location and format for up to 16 texture + * addresses (i[0]:i[15]) and four colors (c[0]:c[3]) + */ +#define R500_RS_IP_0					0x4074 +#define R500_RS_IP_1					0x4078 +#define R500_RS_IP_2					0x407C +#define R500_RS_IP_3					0x4080 +#define R500_RS_IP_4					0x4084 +#define R500_RS_IP_5					0x4088 +#define R500_RS_IP_6					0x408C +#define R500_RS_IP_7					0x4090 +#define R500_RS_IP_8					0x4094 +#define R500_RS_IP_9					0x4098 +#define R500_RS_IP_10					0x409C +#define R500_RS_IP_11					0x40A0 +#define R500_RS_IP_12					0x40A4 +#define R500_RS_IP_13					0x40A8 +#define R500_RS_IP_14					0x40AC +#define R500_RS_IP_15					0x40B0 +#define R500_RS_IP_PTR_K0                               62 +#define R500_RS_IP_PTR_K1                               63 +#define R500_RS_IP_TEX_PTR_S_SHIFT 			0 +#define R500_RS_IP_TEX_PTR_T_SHIFT 			6 +#define R500_RS_IP_TEX_PTR_R_SHIFT 			12 +#define R500_RS_IP_TEX_PTR_Q_SHIFT 			18 +#define R500_RS_IP_COL_PTR_SHIFT 			24 +#define R500_RS_IP_COL_FMT_SHIFT 			27 +#	define R500_RS_COL_PTR(x)		        (x << 24) +#       define R500_RS_COL_FMT(x)                       (x << 27) +/* gap */ +#define R500_RS_IP_OFFSET_DIS 				(0 << 31) +#define R500_RS_IP_OFFSET_EN 				(1 << 31) + +/* gap */ + +/* Zero to flush caches. */ +#define R300_TX_INVALTAGS                   0x4100 +#define R300_TX_FLUSH                       0x0 + +/* The upper enable bits are guessed, based on fglrx reported limits. */ +#define R300_TX_ENABLE                      0x4104 +#       define R300_TX_ENABLE_0                  (1 << 0) +#       define R300_TX_ENABLE_1                  (1 << 1) +#       define R300_TX_ENABLE_2                  (1 << 2) +#       define R300_TX_ENABLE_3                  (1 << 3) +#       define R300_TX_ENABLE_4                  (1 << 4) +#       define R300_TX_ENABLE_5                  (1 << 5) +#       define R300_TX_ENABLE_6                  (1 << 6) +#       define R300_TX_ENABLE_7                  (1 << 7) +#       define R300_TX_ENABLE_8                  (1 << 8) +#       define R300_TX_ENABLE_9                  (1 << 9) +#       define R300_TX_ENABLE_10                 (1 << 10) +#       define R300_TX_ENABLE_11                 (1 << 11) +#       define R300_TX_ENABLE_12                 (1 << 12) +#       define R300_TX_ENABLE_13                 (1 << 13) +#       define R300_TX_ENABLE_14                 (1 << 14) +#       define R300_TX_ENABLE_15                 (1 << 15) + +#define R500_TX_FILTER_4		    0x4110 +#	define R500_TX_WEIGHT_1_SHIFT            (0) +#	define R500_TX_WEIGHT_0_SHIFT            (11) +#	define R500_TX_WEIGHT_PAIR               (1<<22) +#	define R500_TX_PHASE_SHIFT               (23) +#	define R500_TX_DIRECTION_HORIZONTAL	 (0<<27) +#	define R500_TX_DIRECTION_VERITCAL	 (1<<27) + +/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ +#define R300_GA_POINT_S0                              0x4200 + +/* T Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ +#define R300_GA_POINT_T0                              0x4204 + +/* S Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */ +#define R300_GA_POINT_S1                              0x4208 + +/* T Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */ +#define R300_GA_POINT_T1                              0x420c + +/* Specifies amount to shift integer position of vertex (screen space) before + * converting to float for triangle stipple. + */ +#define R300_GA_TRIANGLE_STIPPLE            0x4214 +#	define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_SHIFT 0 +#	define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_MASK  0x0000000f +#	define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT 16 +#	define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_MASK  0x000f0000 + +/* The pointsize is given in multiples of 6. The pointsize can be enormous: + * Clear() renders a single point that fills the entire framebuffer. + * 1/2 Height of point; fixed (16.0), subpixel format (1/12 or 1/16, even if in + * 8b precision). + */ +#define R300_GA_POINT_SIZE                   0x421C +#       define R300_POINTSIZE_Y_SHIFT         0 +#       define R300_POINTSIZE_Y_MASK          0x0000ffff +#       define R300_POINTSIZE_X_SHIFT         16 +#       define R300_POINTSIZE_X_MASK          0xffff0000 +#       define R300_POINTSIZE_MAX             (R300_POINTSIZE_Y_MASK / 6) + +/* Blue fill color */ +#define R500_GA_FILL_R                                0x4220 + +/* Blue fill color */ +#define R500_GA_FILL_G                                0x4224 + +/* Blue fill color */ +#define R500_GA_FILL_B                                0x4228 + +/* Alpha fill color */ +#define R500_GA_FILL_A                                0x422c + + +/* Specifies maximum and minimum point & sprite sizes for per vertex size + * specification. The lower part (15:0) is MIN and (31:16) is max. + */ +#define R300_GA_POINT_MINMAX                0x4230 +#       define R300_GA_POINT_MINMAX_MIN_SHIFT          0 +#       define R300_GA_POINT_MINMAX_MIN_MASK           (0xFFFF << 0) +#       define R300_GA_POINT_MINMAX_MAX_SHIFT          16 +#       define R300_GA_POINT_MINMAX_MAX_MASK           (0xFFFF << 16) + +/* 1/2 width of line, in subpixels (1/12 or 1/16 only, even in 8b + * subprecision); (16.0) fixed format. + * + * The line width is given in multiples of 6. + * In default mode lines are classified as vertical lines. + * HO: horizontal + * VE: vertical or horizontal + * HO & VE: no classification + */ +#define R300_GA_LINE_CNTL                             0x4234 +#       define R300_GA_LINE_CNTL_WIDTH_SHIFT       0 +#       define R300_GA_LINE_CNTL_WIDTH_MASK        0x0000ffff +#	define R300_GA_LINE_CNTL_END_TYPE_HOR      (0 << 16) +#	define R300_GA_LINE_CNTL_END_TYPE_VER      (1 << 16) +#	define R300_GA_LINE_CNTL_END_TYPE_SQR      (2 << 16) /* horizontal or vertical depending upon slope */ +#	define R300_GA_LINE_CNTL_END_TYPE_COMP     (3 << 16) /* Computed (perpendicular to slope) */ +#	define R500_GA_LINE_CNTL_SORT_NO           (0 << 18) +#	define R500_GA_LINE_CNTL_SORT_MINX_MINY    (1 << 18) +/** TODO: looks wrong */ +#       define R300_LINESIZE_MAX              (R300_GA_LINE_CNTL_WIDTH_MASK / 6) +/** TODO: looks wrong */ +#       define R300_LINE_CNT_HO               (1 << 16) +/** TODO: looks wrong */ +#       define R300_LINE_CNT_VE               (1 << 17) + +/* Line Stipple configuration information. */ +#define R300_GA_LINE_STIPPLE_CONFIG                   0x4238 +#	define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_NO     (0 << 0) +#	define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE   (1 << 0) +#	define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_PACKET (2 << 0) +#	define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_SHIFT 2 +#	define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK  0xfffffffc + +/* Used to load US instructions and constants */ +#define R500_GA_US_VECTOR_INDEX               0x4250 +#	define R500_GA_US_VECTOR_INDEX_SHIFT       0 +#	define R500_GA_US_VECTOR_INDEX_MASK        0x000000ff +#	define R500_GA_US_VECTOR_INDEX_TYPE_INSTR  (0 << 16) +#	define R500_GA_US_VECTOR_INDEX_TYPE_CONST  (1 << 16) +#	define R500_GA_US_VECTOR_INDEX_CLAMP_NO    (0 << 17) +#	define R500_GA_US_VECTOR_INDEX_CLAMP_CONST (1 << 17) + +/* Data register for loading US instructions and constants */ +#define R500_GA_US_VECTOR_DATA                0x4254 + +/* Specifies color properties and mappings of textures. */ +#define R500_GA_COLOR_CONTROL_PS3                     0x4258 +#	define R500_TEX0_SHADING_PS3_SOLID       (0 << 0) +#	define R500_TEX0_SHADING_PS3_FLAT        (1 << 0) +#	define R500_TEX0_SHADING_PS3_GOURAUD     (2 << 0) +#	define R500_TEX1_SHADING_PS3_SOLID       (0 << 2) +#	define R500_TEX1_SHADING_PS3_FLAT        (1 << 2) +#	define R500_TEX1_SHADING_PS3_GOURAUD     (2 << 2) +#	define R500_TEX2_SHADING_PS3_SOLID       (0 << 4) +#	define R500_TEX2_SHADING_PS3_FLAT        (1 << 4) +#	define R500_TEX2_SHADING_PS3_GOURAUD     (2 << 4) +#	define R500_TEX3_SHADING_PS3_SOLID       (0 << 6) +#	define R500_TEX3_SHADING_PS3_FLAT        (1 << 6) +#	define R500_TEX3_SHADING_PS3_GOURAUD     (2 << 6) +#	define R500_TEX4_SHADING_PS3_SOLID       (0 << 8) +#	define R500_TEX4_SHADING_PS3_FLAT        (1 << 8) +#	define R500_TEX4_SHADING_PS3_GOURAUD     (2 << 8) +#	define R500_TEX5_SHADING_PS3_SOLID       (0 << 10) +#	define R500_TEX5_SHADING_PS3_FLAT        (1 << 10) +#	define R500_TEX5_SHADING_PS3_GOURAUD     (2 << 10) +#	define R500_TEX6_SHADING_PS3_SOLID       (0 << 12) +#	define R500_TEX6_SHADING_PS3_FLAT        (1 << 12) +#	define R500_TEX6_SHADING_PS3_GOURAUD     (2 << 12) +#	define R500_TEX7_SHADING_PS3_SOLID       (0 << 14) +#	define R500_TEX7_SHADING_PS3_FLAT        (1 << 14) +#	define R500_TEX7_SHADING_PS3_GOURAUD     (2 << 14) +#	define R500_TEX8_SHADING_PS3_SOLID       (0 << 16) +#	define R500_TEX8_SHADING_PS3_FLAT        (1 << 16) +#	define R500_TEX8_SHADING_PS3_GOURAUD     (2 << 16) +#	define R500_TEX9_SHADING_PS3_SOLID       (0 << 18) +#	define R500_TEX9_SHADING_PS3_FLAT        (1 << 18) +#	define R500_TEX9_SHADING_PS3_GOURAUD     (2 << 18) +#	define R500_TEX10_SHADING_PS3_SOLID      (0 << 20) +#	define R500_TEX10_SHADING_PS3_FLAT       (1 << 20) +#	define R500_TEX10_SHADING_PS3_GOURAUD    (2 << 20) +#	define R500_COLOR0_TEX_OVERRIDE_NO       (0 << 22) +#	define R500_COLOR0_TEX_OVERRIDE_TEX_0    (1 << 22) +#	define R500_COLOR0_TEX_OVERRIDE_TEX_1    (2 << 22) +#	define R500_COLOR0_TEX_OVERRIDE_TEX_2    (3 << 22) +#	define R500_COLOR0_TEX_OVERRIDE_TEX_3    (4 << 22) +#	define R500_COLOR0_TEX_OVERRIDE_TEX_4    (5 << 22) +#	define R500_COLOR0_TEX_OVERRIDE_TEX_5    (6 << 22) +#	define R500_COLOR0_TEX_OVERRIDE_TEX_6    (7 << 22) +#	define R500_COLOR0_TEX_OVERRIDE_TEX_7    (8 << 22) +#	define R500_COLOR0_TEX_OVERRIDE_TEX_8_C2 (9 << 22) +#	define R500_COLOR0_TEX_OVERRIDE_TEX_9_C3 (10 << 22) +#	define R500_COLOR1_TEX_OVERRIDE_NO       (0 << 26) +#	define R500_COLOR1_TEX_OVERRIDE_TEX_0    (1 << 26) +#	define R500_COLOR1_TEX_OVERRIDE_TEX_1    (2 << 26) +#	define R500_COLOR1_TEX_OVERRIDE_TEX_2    (3 << 26) +#	define R500_COLOR1_TEX_OVERRIDE_TEX_3    (4 << 26) +#	define R500_COLOR1_TEX_OVERRIDE_TEX_4    (5 << 26) +#	define R500_COLOR1_TEX_OVERRIDE_TEX_5    (6 << 26) +#	define R500_COLOR1_TEX_OVERRIDE_TEX_6    (7 << 26) +#	define R500_COLOR1_TEX_OVERRIDE_TEX_7    (8 << 26) +#	define R500_COLOR1_TEX_OVERRIDE_TEX_8_C2 (9 << 26) +#	define R500_COLOR1_TEX_OVERRIDE_TEX_9_C3 (10 << 26) + +/* Returns idle status of various G3D block, captured when GA_IDLE written or + * when hard or soft reset asserted. + */ +#define R500_GA_IDLE                                  0x425c +#	define R500_GA_IDLE_PIPE3_Z_IDLE  (0 << 0) +#	define R500_GA_IDLE_PIPE2_Z_IDLE  (0 << 1) +#	define R500_GA_IDLE_PIPE3_CD_IDLE (0 << 2) +#	define R500_GA_IDLE_PIPE2_CD_IDLE (0 << 3) +#	define R500_GA_IDLE_PIPE3_FG_IDLE (0 << 4) +#	define R500_GA_IDLE_PIPE2_FG_IDLE (0 << 5) +#	define R500_GA_IDLE_PIPE3_US_IDLE (0 << 6) +#	define R500_GA_IDLE_PIPE2_US_IDLE (0 << 7) +#	define R500_GA_IDLE_PIPE3_SC_IDLE (0 << 8) +#	define R500_GA_IDLE_PIPE2_SC_IDLE (0 << 9) +#	define R500_GA_IDLE_PIPE3_RS_IDLE (0 << 10) +#	define R500_GA_IDLE_PIPE2_RS_IDLE (0 << 11) +#	define R500_GA_IDLE_PIPE1_Z_IDLE  (0 << 12) +#	define R500_GA_IDLE_PIPE0_Z_IDLE  (0 << 13) +#	define R500_GA_IDLE_PIPE1_CD_IDLE (0 << 14) +#	define R500_GA_IDLE_PIPE0_CD_IDLE (0 << 15) +#	define R500_GA_IDLE_PIPE1_FG_IDLE (0 << 16) +#	define R500_GA_IDLE_PIPE0_FG_IDLE (0 << 17) +#	define R500_GA_IDLE_PIPE1_US_IDLE (0 << 18) +#	define R500_GA_IDLE_PIPE0_US_IDLE (0 << 19) +#	define R500_GA_IDLE_PIPE1_SC_IDLE (0 << 20) +#	define R500_GA_IDLE_PIPE0_SC_IDLE (0 << 21) +#	define R500_GA_IDLE_PIPE1_RS_IDLE (0 << 22) +#	define R500_GA_IDLE_PIPE0_RS_IDLE (0 << 23) +#	define R500_GA_IDLE_SU_IDLE       (0 << 24) +#	define R500_GA_IDLE_GA_IDLE       (0 << 25) +#	define R500_GA_IDLE_GA_UNIT2_IDLE (0 << 26) + +/* Current value of stipple accumulator. */ +#define R300_GA_LINE_STIPPLE_VALUE            0x4260 + +/* S Texture Coordinate Value for Vertex 0 of Line (stuff textures -- i.e. AA) */ +#define R300_GA_LINE_S0                               0x4264 +/* S Texture Coordinate Value for Vertex 1 of Lines (V2 of parallelogram -- stuff textures -- i.e. AA) */ +#define R300_GA_LINE_S1                               0x4268 + +/* GA Input fifo high water marks */ +#define R500_GA_FIFO_CNTL                             0x4270 +#	define R500_GA_FIFO_CNTL_VERTEX_FIFO_MASK   0x00000007 +#	define R500_GA_FIFO_CNTL_VERTEX_FIFO_SHIFT  0 +#	define R500_GA_FIFO_CNTL_VERTEX_INDEX_MASK  0x00000038 +#	define R500_GA_FIFO_CNTL_VERTEX_INDEX_SHIFT 3 +#	define R500_GA_FIFO_CNTL_VERTEX_REG_MASK    0x00003fc0 +#	define R500_GA_FIFO_CNTL_VERTEX_REG_SHIFT   6 + +/* GA enhance/tweaks */ +#define R300_GA_ENHANCE                               0x4274 +#	define R300_GA_ENHANCE_DEADLOCK_CNTL_NO_EFFECT   (0 << 0) +#	define R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL (1 << 0) /* Prevents TCL interface from deadlocking on GA side. */ +#	define R300_GA_ENHANCE_FASTSYNC_CNTL_NO_EFFECT   (0 << 1) +#	define R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE      (1 << 1) /* Enables high-performance register/primitive switching. */ +#	define R500_GA_ENHANCE_REG_READWRITE_NO_EFFECT   (0 << 2) /* R520+ only */ +#	define R500_GA_ENHANCE_REG_READWRITE_ENABLE      (1 << 2) /* R520+ only, Enables GA support of simultaneous register reads and writes. */ +#	define R500_GA_ENHANCE_REG_NOSTALL_NO_EFFECT     (0 << 3) +#	define R500_GA_ENHANCE_REG_NOSTALL_ENABLE        (1 << 3) /* Enables GA support of no-stall reads for register read back. */ + +#define R300_GA_COLOR_CONTROL                   0x4278 +#	define R300_GA_COLOR_CONTROL_RGB0_SHADING_SOLID      (0 << 0) +#	define R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT       (1 << 0) +#	define R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD    (2 << 0) +#	define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_SOLID    (0 << 2) +#	define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT     (1 << 2) +#	define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD  (2 << 2) +#	define R300_GA_COLOR_CONTROL_RGB1_SHADING_SOLID      (0 << 4) +#	define R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT       (1 << 4) +#	define R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD    (2 << 4) +#	define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_SOLID    (0 << 6) +#	define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_FLAT     (1 << 6) +#	define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD  (2 << 6) +#	define R300_GA_COLOR_CONTROL_RGB2_SHADING_SOLID      (0 << 8) +#	define R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT       (1 << 8) +#	define R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD    (2 << 8) +#	define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_SOLID    (0 << 10) +#	define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT     (1 << 10) +#	define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD  (2 << 10) +#	define R300_GA_COLOR_CONTROL_RGB3_SHADING_SOLID      (0 << 12) +#	define R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT       (1 << 12) +#	define R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD    (2 << 12) +#	define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_SOLID    (0 << 14) +#	define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT     (1 << 14) +#	define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD  (2 << 14) +#	define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST  (0 << 16) +#	define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND (1 << 16) +#	define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_THIRD  (2 << 16) +#	define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST   (3 << 16) + +/** TODO: might be candidate for removal */ +#	define R300_RE_SHADE_MODEL_SMOOTH     ( \ +	R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \ +	R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ +	R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \ +	R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ +	R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) +/** TODO: might be candidate for removal, the GOURAUD stuff also looks buggy to me */ +#	define R300_RE_SHADE_MODEL_FLAT     ( \ +	R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \ +	R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ +	R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \ +	R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ +	R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) + +/* Specifies red & green components of fill color -- S312 format -- Backwards comp. */ +#define R300_GA_SOLID_RG                         0x427c +#	define GA_SOLID_RG_COLOR_GREEN_SHIFT 0 +#	define GA_SOLID_RG_COLOR_GREEN_MASK  0x0000ffff +#	define GA_SOLID_RG_COLOR_RED_SHIFT   16 +#	define GA_SOLID_RG_COLOR_RED_MASK    0xffff0000 +/* Specifies blue & alpha components of fill color -- S312 format -- Backwards comp. */ +#define R300_GA_SOLID_BA                         0x4280 +#	define GA_SOLID_BA_COLOR_ALPHA_SHIFT 0 +#	define GA_SOLID_BA_COLOR_ALPHA_MASK  0x0000ffff +#	define GA_SOLID_BA_COLOR_BLUE_SHIFT  16 +#	define GA_SOLID_BA_COLOR_BLUE_MASK   0xffff0000 + +/* Polygon Mode + * Dangerous + */ +#define R300_GA_POLY_MODE                             0x4288 +#	define R300_GA_POLY_MODE_DISABLE           (0 << 0) +#	define R300_GA_POLY_MODE_DUAL              (1 << 0) /* send 2 sets of 3 polys with specified poly type */ +/* reserved */ +#	define R300_GA_POLY_MODE_FRONT_PTYPE_POINT (0 << 4) +#	define R300_GA_POLY_MODE_FRONT_PTYPE_LINE  (1 << 4) +#	define R300_GA_POLY_MODE_FRONT_PTYPE_TRI   (2 << 4) +/* reserved */ +#	define R300_GA_POLY_MODE_BACK_PTYPE_POINT  (0 << 7) +#	define R300_GA_POLY_MODE_BACK_PTYPE_LINE   (1 << 7) +#	define R300_GA_POLY_MODE_BACK_PTYPE_TRI    (2 << 7) +/* reserved */ + +/* Specifies the rouding mode for geometry & color SPFP to FP conversions. */ +#define R300_GA_ROUND_MODE                            0x428c +#	define R300_GA_ROUND_MODE_GEOMETRY_ROUND_TRUNC   (0 << 0) +#	define R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST (1 << 0) +#	define R300_GA_ROUND_MODE_COLOR_ROUND_TRUNC      (0 << 2) +#	define R300_GA_ROUND_MODE_COLOR_ROUND_NEAREST    (1 << 2) +#	define R300_GA_ROUND_MODE_RGB_CLAMP_RGB          (0 << 4) +#	define R300_GA_ROUND_MODE_RGB_CLAMP_FP20         (1 << 4) +#	define R300_GA_ROUND_MODE_ALPHA_CLAMP_RGB        (0 << 5) +#	define R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20       (1 << 5) +#	define R500_GA_ROUND_MODE_GEOMETRY_MASK_SHIFT    6 +#	define R500_GA_ROUND_MODE_GEOMETRY_MASK_MASK     0x000003c0 + +/* Specifies x & y offsets for vertex data after conversion to FP. + * Offsets are in S15 format (subpixels -- 1/12 or 1/16, even in 8b + * subprecision). + */ +#define R300_GA_OFFSET                                0x4290 +#	define R300_GA_OFFSET_X_OFFSET_SHIFT 0 +#	define R300_GA_OFFSET_X_OFFSET_MASK  0x0000ffff +#	define R300_GA_OFFSET_Y_OFFSET_SHIFT 16 +#	define R300_GA_OFFSET_Y_OFFSET_MASK  0xffff0000 + +/* Specifies the scale to apply to fog. */ +#define R300_GA_FOG_SCALE                     0x4294 +/* Specifies the offset to apply to fog. */ +#define R300_GA_FOG_OFFSET                    0x4298 +/* Specifies number of cycles to assert reset, and also causes RB3D soft reset to assert. */ +#define R300_GA_SOFT_RESET                    0x429c + +/* Not sure why there are duplicate of factor and constant values. + * My best guess so far is that there are seperate zbiases for test and write. + * Ordering might be wrong. + * Some of the tests indicate that fgl has a fallback implementation of zbias + * via pixel shaders. + */ +#define R300_SU_TEX_WRAP                      0x42A0 +#define R300_SU_POLY_OFFSET_FRONT_SCALE       0x42A4 +#define R300_SU_POLY_OFFSET_FRONT_OFFSET      0x42A8 +#define R300_SU_POLY_OFFSET_BACK_SCALE        0x42AC +#define R300_SU_POLY_OFFSET_BACK_OFFSET       0x42B0 + +/* This register needs to be set to (1<<1) for RV350 to correctly + * perform depth test (see --vb-triangles in r300_demo) + * Don't know about other chips. - Vladimir + * This is set to 3 when GL_POLYGON_OFFSET_FILL is on. + * My guess is that there are two bits for each zbias primitive + * (FILL, LINE, POINT). + *  One to enable depth test and one for depth write. + * Yet this doesnt explain why depth writes work ... + */ +#define R300_SU_POLY_OFFSET_ENABLE	       0x42B4 +#	define R300_FRONT_ENABLE	       (1 << 0) +#	define R300_BACK_ENABLE 	       (1 << 1) +#	define R300_PARA_ENABLE 	       (1 << 2) + +#define R300_SU_CULL_MODE                      0x42B8 +#       define R300_CULL_FRONT                   (1 << 0) +#       define R300_CULL_BACK                    (1 << 1) +#       define R300_FRONT_FACE_CCW               (0 << 2) +#       define R300_FRONT_FACE_CW                (1 << 2) + +/* SU Depth Scale value */ +#define R300_SU_DEPTH_SCALE                 0x42c0 +/* SU Depth Offset value */ +#define R300_SU_DEPTH_OFFSET                0x42c4 + + +/* BEGIN: Rasterization / Interpolators - many guesses */ + +/* + * TC_CNT is the number of incoming texture coordinate sets (i.e. it depends + * on the vertex program, *not* the fragment program) + */ +#define R300_RS_COUNT                      0x4300 +#       define R300_IT_COUNT_SHIFT               0 +#       define R300_IT_COUNT_MASK                0x0000007f +#       define R300_IC_COUNT_SHIFT               7 +#       define R300_IC_COUNT_MASK                0x00000780 +#       define R300_W_ADDR_SHIFT                 12 +#       define R300_W_ADDR_MASK                  0x0003f000 +#       define R300_HIRES_DIS                    (0 << 18) +#       define R300_HIRES_EN                     (1 << 18) + +#define R300_RS_INST_COUNT                       0x4304 +#       define R300_RS_INST_COUNT_SHIFT          0 +#       define R300_RS_INST_COUNT_MASK           0x0000000f +#       define R300_RS_TX_OFFSET_SHIFT           5 +#	define R300_RS_TX_OFFSET_MASK            0x000000e0 + +/* gap */ + +/* Only used for texture coordinates. + * Use the source field to route texture coordinate input from the + * vertex program to the desired interpolator. Note that the source + * field is relative to the outputs the vertex program *actually* + * writes. If a vertex program only writes texcoord[1], this will + * be source index 0. + * Set INTERP_USED on all interpolators that produce data used by + * the fragment program. INTERP_USED looks like a swizzling mask, + * but I haven't seen it used that way. + * + * Note: The _UNKNOWN constants are always set in their respective + * register. I don't know if this is necessary. + */ +#define R300_RS_IP_0				        0x4310 +#define R300_RS_IP_1				        0x4314 +#define R300_RS_IP_2				        0x4318 +#define R300_RS_IP_3				        0x431C +#       define R300_RS_INTERP_SRC_SHIFT          2 /* TODO: check for removal */ +#       define R300_RS_INTERP_SRC_MASK           (7 << 2) /* TODO: check for removal */ +#	define R300_RS_TEX_PTR(x)		        (x << 0) +#	define R300_RS_COL_PTR(x)		        (x << 6) +#	define R300_RS_COL_FMT(x)		        (x << 9) +#	define R300_RS_COL_FMT_RGBA		        0 +#	define R300_RS_COL_FMT_RGB0		        1 +#	define R300_RS_COL_FMT_RGB1		        2 +#	define R300_RS_COL_FMT_000A		        4 +#	define R300_RS_COL_FMT_0000		        5 +#	define R300_RS_COL_FMT_0001		        6 +#	define R300_RS_COL_FMT_111A		        8 +#	define R300_RS_COL_FMT_1110		        9 +#	define R300_RS_COL_FMT_1111		        10 +#	define R300_RS_SEL_S(x)		                (x << 13) +#	define R300_RS_SEL_T(x)		                (x << 16) +#	define R300_RS_SEL_R(x)		                (x << 19) +#	define R300_RS_SEL_Q(x)		                (x << 22) +#	define R300_RS_SEL_C0		                0 +#	define R300_RS_SEL_C1		                1 +#	define R300_RS_SEL_C2		                2 +#	define R300_RS_SEL_C3		                3 +#	define R300_RS_SEL_K0		                4 +#	define R300_RS_SEL_K1		                5 + + +/*  */ +#define R500_RS_INST_0					0x4320 +#define R500_RS_INST_1					0x4324 +#define R500_RS_INST_2					0x4328 +#define R500_RS_INST_3					0x432c +#define R500_RS_INST_4					0x4330 +#define R500_RS_INST_5					0x4334 +#define R500_RS_INST_6					0x4338 +#define R500_RS_INST_7					0x433c +#define R500_RS_INST_8					0x4340 +#define R500_RS_INST_9					0x4344 +#define R500_RS_INST_10					0x4348 +#define R500_RS_INST_11					0x434c +#define R500_RS_INST_12					0x4350 +#define R500_RS_INST_13					0x4354 +#define R500_RS_INST_14					0x4358 +#define R500_RS_INST_15					0x435c +#define R500_RS_INST_TEX_ID_SHIFT			0 +#define R500_RS_INST_TEX_CN_WRITE			(1 << 4) +#define R500_RS_INST_TEX_ADDR_SHIFT			5 +#define R500_RS_INST_COL_ID_SHIFT			12 +#define R500_RS_INST_COL_CN_NO_WRITE			(0 << 16) +#define R500_RS_INST_COL_CN_WRITE			(1 << 16) +#define R500_RS_INST_COL_CN_WRITE_FBUFFER		(2 << 16) +#define R500_RS_INST_COL_CN_WRITE_BACKFACE		(3 << 16) +#define R500_RS_INST_COL_ADDR_SHIFT			18 +#define R500_RS_INST_TEX_ADJ				(1 << 25) +#define R500_RS_INST_W_CN				(1 << 26) + +/* These DWORDs control how vertex data is routed into fragment program + * registers, after interpolators. + */ +#define R300_RS_INST_0                     0x4330 +#define R300_RS_INST_1                     0x4334 +#define R300_RS_INST_2                     0x4338 +#define R300_RS_INST_3                     0x433C +#define R300_RS_INST_4                     0x4340 +#define R300_RS_INST_5                     0x4344 +#define R300_RS_INST_6                     0x4348 +#define R300_RS_INST_7                     0x434C +#	define R300_RS_INST_TEX_ID(x)  		((x) << 0) +#	define R300_RS_INST_TEX_CN_WRITE 	(1 << 3) +#	define R300_RS_INST_TEX_ADDR_SHIFT 	6 +#	define R300_RS_INST_COL_ID(x)		((x) << 11) +#	define R300_RS_INST_COL_CN_WRITE	(1 << 14) +#	define R300_RS_INST_COL_ADDR_SHIFT	17 +#	define R300_RS_INST_TEX_ADJ		(1 << 22) +#	define R300_RS_COL_BIAS_UNUSED_SHIFT    23 + +/* END: Rasterization / Interpolators - many guesses */ + +/* Hierarchical Z Enable */ +#define R300_SC_HYPERZ                   0x43a4 +#	define R300_SC_HYPERZ_DISABLE     (0 << 0) +#	define R300_SC_HYPERZ_ENABLE      (1 << 0) +#	define R300_SC_HYPERZ_MIN         (0 << 1) +#	define R300_SC_HYPERZ_MAX         (1 << 1) +#	define R300_SC_HYPERZ_ADJ_256     (0 << 2) +#	define R300_SC_HYPERZ_ADJ_128     (1 << 2) +#	define R300_SC_HYPERZ_ADJ_64      (2 << 2) +#	define R300_SC_HYPERZ_ADJ_32      (3 << 2) +#	define R300_SC_HYPERZ_ADJ_16      (4 << 2) +#	define R300_SC_HYPERZ_ADJ_8       (5 << 2) +#	define R300_SC_HYPERZ_ADJ_4       (6 << 2) +#	define R300_SC_HYPERZ_ADJ_2       (7 << 2) +#	define R300_SC_HYPERZ_HZ_Z0MIN_NO (0 << 5) +#	define R300_SC_HYPERZ_HZ_Z0MIN    (1 << 5) +#	define R300_SC_HYPERZ_HZ_Z0MAX_NO (0 << 6) +#	define R300_SC_HYPERZ_HZ_Z0MAX    (1 << 6) + +#define R300_SC_EDGERULE                 0x43a8 + +/* BEGIN: Scissors and cliprects */ + +/* There are four clipping rectangles. Their corner coordinates are inclusive. + * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending + * on whether the pixel is inside cliprects 0-3, respectively. For example, + * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned + * the number 3 (binary 0011). + * Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set, + * the pixel is rasterized. + * + * In addition to this, there is a scissors rectangle. Only pixels inside the + * scissors rectangle are drawn. (coordinates are inclusive) + * + * For some reason, the top-left corner of the framebuffer is at (1440, 1440) + * for the purpose of clipping and scissors. + */ +#define R300_SC_CLIPRECT_TL_0               0x43B0 +#define R300_SC_CLIPRECT_BR_0               0x43B4 +#define R300_SC_CLIPRECT_TL_1               0x43B8 +#define R300_SC_CLIPRECT_BR_1               0x43BC +#define R300_SC_CLIPRECT_TL_2               0x43C0 +#define R300_SC_CLIPRECT_BR_2               0x43C4 +#define R300_SC_CLIPRECT_TL_3               0x43C8 +#define R300_SC_CLIPRECT_BR_3               0x43CC +#       define R300_CLIPRECT_OFFSET              1440 +#       define R300_CLIPRECT_MASK                0x1FFF +#       define R300_CLIPRECT_X_SHIFT             0 +#       define R300_CLIPRECT_X_MASK              (0x1FFF << 0) +#       define R300_CLIPRECT_Y_SHIFT             13 +#       define R300_CLIPRECT_Y_MASK              (0x1FFF << 13) +#define R300_SC_CLIP_RULE                   0x43D0 +#       define R300_CLIP_OUT                     (1 << 0) +#       define R300_CLIP_0                       (1 << 1) +#       define R300_CLIP_1                       (1 << 2) +#       define R300_CLIP_10                      (1 << 3) +#       define R300_CLIP_2                       (1 << 4) +#       define R300_CLIP_20                      (1 << 5) +#       define R300_CLIP_21                      (1 << 6) +#       define R300_CLIP_210                     (1 << 7) +#       define R300_CLIP_3                       (1 << 8) +#       define R300_CLIP_30                      (1 << 9) +#       define R300_CLIP_31                      (1 << 10) +#       define R300_CLIP_310                     (1 << 11) +#       define R300_CLIP_32                      (1 << 12) +#       define R300_CLIP_320                     (1 << 13) +#       define R300_CLIP_321                     (1 << 14) +#       define R300_CLIP_3210                    (1 << 15) + +/* gap */ + +#define R300_SC_SCISSORS_TL                 0x43E0 +#define R300_SC_SCISSORS_BR                 0x43E4 +#       define R300_SCISSORS_OFFSET              1440 +#       define R300_SCISSORS_X_SHIFT             0 +#       define R300_SCISSORS_X_MASK              (0x1FFF << 0) +#       define R300_SCISSORS_Y_SHIFT             13 +#       define R300_SCISSORS_Y_MASK              (0x1FFF << 13) + +/* Screen door sample mask */ +#define R300_SC_SCREENDOOR                 0x43e8 + +/* END: Scissors and cliprects */ + +/* BEGIN: Texture specification */ + +/* + * The texture specification dwords are grouped by meaning and not by texture + * unit. This means that e.g. the offset for texture image unit N is found in + * register TX_OFFSET_0 + (4*N) + */ +#define R300_TX_FILTER0_0                        0x4400 +#define R300_TX_FILTER0_1                        0x4404 +#define R300_TX_FILTER0_2                        0x4408 +#define R300_TX_FILTER0_3                        0x440c +#define R300_TX_FILTER0_4                        0x4410 +#define R300_TX_FILTER0_5                        0x4414 +#define R300_TX_FILTER0_6                        0x4418 +#define R300_TX_FILTER0_7                        0x441c +#define R300_TX_FILTER0_8                        0x4420 +#define R300_TX_FILTER0_9                        0x4424 +#define R300_TX_FILTER0_10                       0x4428 +#define R300_TX_FILTER0_11                       0x442c +#define R300_TX_FILTER0_12                       0x4430 +#define R300_TX_FILTER0_13                       0x4434 +#define R300_TX_FILTER0_14                       0x4438 +#define R300_TX_FILTER0_15                       0x443c +#       define R300_TX_REPEAT                    0 +#       define R300_TX_MIRRORED                  1 +#       define R300_TX_CLAMP_TO_EDGE             2 +#	define R300_TX_MIRROR_ONCE_TO_EDGE       3 +#       define R300_TX_CLAMP                     4 +#	define R300_TX_MIRROR_ONCE               5 +#       define R300_TX_CLAMP_TO_BORDER           6 +#	define R300_TX_MIRROR_ONCE_TO_BORDER     7 +#       define R300_TX_WRAP_S_SHIFT              0 +#       define R300_TX_WRAP_S_MASK               (7 << 0) +#       define R300_TX_WRAP_T_SHIFT              3 +#       define R300_TX_WRAP_T_MASK               (7 << 3) +#       define R300_TX_WRAP_R_SHIFT              6 +#       define R300_TX_WRAP_R_MASK               (7 << 6) +#	define R300_TX_MAG_FILTER_4              (0 << 9) +#       define R300_TX_MAG_FILTER_NEAREST        (1 << 9) +#       define R300_TX_MAG_FILTER_LINEAR         (2 << 9) +#       define R300_TX_MAG_FILTER_ANISO          (3 << 9) +#       define R300_TX_MAG_FILTER_MASK           (3 << 9) +#       define R300_TX_MIN_FILTER_NEAREST        (1 << 11) +#       define R300_TX_MIN_FILTER_LINEAR         (2 << 11) +#	define R300_TX_MIN_FILTER_ANISO          (3 << 11) +#	define R300_TX_MIN_FILTER_MASK           (3 << 11) +#	define R300_TX_MIN_FILTER_MIP_NONE       (0 << 13) +#	define R300_TX_MIN_FILTER_MIP_NEAREST    (1 << 13) +#	define R300_TX_MIN_FILTER_MIP_LINEAR     (2 << 13) +#	define R300_TX_MIN_FILTER_MIP_MASK       (3 << 13) +#	define R300_TX_MAX_ANISO_1_TO_1          (0 << 21) +#	define R300_TX_MAX_ANISO_2_TO_1          (1 << 21) +#	define R300_TX_MAX_ANISO_4_TO_1          (2 << 21) +#	define R300_TX_MAX_ANISO_8_TO_1          (3 << 21) +#	define R300_TX_MAX_ANISO_16_TO_1         (4 << 21) +#	define R300_TX_MAX_ANISO_MASK            (7 << 21) + +#define R300_TX_FILTER1_0                      0x4440 +#	define R300_CHROMA_KEY_MODE_DISABLE    0 +#	define R300_CHROMA_KEY_FORCE	       1 +#	define R300_CHROMA_KEY_BLEND           2 +#	define R300_MC_ROUND_NORMAL            (0<<2) +#	define R300_MC_ROUND_MPEG4             (1<<2) +#	define R300_LOD_BIAS_SHIFT             3 +#	define R300_LOD_BIAS_MASK	       0x1ff8 +#	define R300_EDGE_ANISO_EDGE_DIAG       (0<<13) +#	define R300_EDGE_ANISO_EDGE_ONLY       (1<<13) +#	define R300_MC_COORD_TRUNCATE_DISABLE  (0<<14) +#	define R300_MC_COORD_TRUNCATE_MPEG     (1<<14) +#	define R300_TX_TRI_PERF_0_8            (0<<15) +#	define R300_TX_TRI_PERF_1_8            (1<<15) +#	define R300_TX_TRI_PERF_1_4            (2<<15) +#	define R300_TX_TRI_PERF_3_8            (3<<15) +#	define R300_ANISO_THRESHOLD_MASK       (7<<17) + +#	define R500_MACRO_SWITCH               (1<<22) +#	define R500_BORDER_FIX                 (1<<31) + +#define R300_TX_SIZE_0                      0x4480 +#       define R300_TX_WIDTHMASK_SHIFT           0 +#       define R300_TX_WIDTHMASK_MASK            (2047 << 0) +#       define R300_TX_HEIGHTMASK_SHIFT          11 +#       define R300_TX_HEIGHTMASK_MASK           (2047 << 11) +#	define R300_TX_DEPTHMASK_SHIFT		 22 +#	define R300_TX_DEPTHMASK_MASK		 (0xf << 22) +#       define R300_TX_MAX_MIP_LEVEL_SHIFT       26 +#       define R300_TX_MAX_MIP_LEVEL_MASK        (0xf << 26) +#       define R300_TX_SIZE_PROJECTED            (1<<30) +#       define R300_TX_SIZE_TXPITCH_EN           (1<<31) +#define R300_TX_FORMAT_0                    0x44C0 +	/* The interpretation of the format word by Wladimir van der Laan */ +	/* The X, Y, Z and W refer to the layout of the components. +	   They are given meanings as R, G, B and Alpha by the swizzle +	   specification */ +#	define R300_TX_FORMAT_X8		    0x0 +#	define R500_TX_FORMAT_X1		    0x0 // bit set in format 2 +#	define R300_TX_FORMAT_X16		    0x1 +#	define R500_TX_FORMAT_X1_REV		    0x0 // bit set in format 2 +#	define R300_TX_FORMAT_Y4X4		    0x2 +#	define R300_TX_FORMAT_Y8X8		    0x3 +#	define R300_TX_FORMAT_Y16X16		    0x4 +#	define R300_TX_FORMAT_Z3Y3X2		    0x5 +#	define R300_TX_FORMAT_Z5Y6X5		    0x6 +#	define R300_TX_FORMAT_Z6Y5X5		    0x7 +#	define R300_TX_FORMAT_Z11Y11X10		    0x8 +#	define R300_TX_FORMAT_Z10Y11X11		    0x9 +#	define R300_TX_FORMAT_W4Z4Y4X4		    0xA +#	define R300_TX_FORMAT_W1Z5Y5X5		    0xB +#	define R300_TX_FORMAT_W8Z8Y8X8		    0xC +#	define R300_TX_FORMAT_W2Z10Y10X10	    0xD +#	define R300_TX_FORMAT_W16Z16Y16X16	    0xE +#	define R300_TX_FORMAT_DXT1	    	    0xF +#	define R300_TX_FORMAT_DXT3	    	    0x10 +#	define R300_TX_FORMAT_DXT5	    	    0x11 +#	define R300_TX_FORMAT_D3DMFT_CxV8U8	    0x12     /* no swizzle */ +#	define R300_TX_FORMAT_A8R8G8B8	    	    0x13     /* no swizzle */ +#	define R300_TX_FORMAT_B8G8_B8G8	    	    0x14     /* no swizzle */ +#	define R300_TX_FORMAT_G8R8_G8B8	    	    0x15     /* no swizzle */ + +	/* These two values are wrong, but they're the only values that +	 * produce any even vaguely correct results.  Can r300 only do 16-bit +	 * depth textures? +	 */ +#	define R300_TX_FORMAT_X24_Y8	    	    0x1e +#	define R300_TX_FORMAT_X32	    	    0x1e + +	/* 0x16 - some 16 bit green format.. ?? */ +#	define R300_TX_FORMAT_3D		   (1 << 25) +#	define R300_TX_FORMAT_CUBIC_MAP		   (2 << 25) + +	/* gap */ +	/* Floating point formats */ +	/* Note - hardware supports both 16 and 32 bit floating point */ +#	define R300_TX_FORMAT_FL_I16	    	    0x18 +#	define R300_TX_FORMAT_FL_I16A16	    	    0x19 +#	define R300_TX_FORMAT_FL_R16G16B16A16	    0x1A +#	define R300_TX_FORMAT_FL_I32	    	    0x1B +#	define R300_TX_FORMAT_FL_I32A32	    	    0x1C +#	define R300_TX_FORMAT_FL_R32G32B32A32	    0x1D +	/* alpha modes, convenience mostly */ +	/* if you have alpha, pick constant appropriate to the +	   number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc */ +# 	define R300_TX_FORMAT_ALPHA_1CH		    0x000 +# 	define R300_TX_FORMAT_ALPHA_2CH		    0x200 +# 	define R300_TX_FORMAT_ALPHA_4CH		    0x600 +# 	define R300_TX_FORMAT_ALPHA_NONE	    0xA00 +	/* Swizzling */ +	/* constants */ +#	define R300_TX_FORMAT_X		0 +#	define R300_TX_FORMAT_Y		1 +#	define R300_TX_FORMAT_Z		2 +#	define R300_TX_FORMAT_W		3 +#	define R300_TX_FORMAT_ZERO	4 +#	define R300_TX_FORMAT_ONE	5 +	/* 2.0*Z, everything above 1.0 is set to 0.0 */ +#	define R300_TX_FORMAT_CUT_Z	6 +	/* 2.0*W, everything above 1.0 is set to 0.0 */ +#	define R300_TX_FORMAT_CUT_W	7 + +#	define R300_TX_FORMAT_B_SHIFT	18 +#	define R300_TX_FORMAT_G_SHIFT	15 +#	define R300_TX_FORMAT_R_SHIFT	12 +#	define R300_TX_FORMAT_A_SHIFT	9 +	/* Convenience macro to take care of layout and swizzling */ +#	define R300_EASY_TX_FORMAT(B, G, R, A, FMT)	(		\ +		((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT)		\ +		| ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT)	\ +		| ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT)	\ +		| ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT)	\ +		| (R300_TX_FORMAT_##FMT)				\ +		) +	/* These can be ORed with result of R300_EASY_TX_FORMAT() +	   We don't really know what they do. Take values from a +           constant color ? */ +#	define R300_TX_FORMAT_CONST_X		(1<<5) +#	define R300_TX_FORMAT_CONST_Y		(2<<5) +#	define R300_TX_FORMAT_CONST_Z		(4<<5) +#	define R300_TX_FORMAT_CONST_W		(8<<5) + +#	define R300_TX_FORMAT_YUV_MODE		0x00800000 + +#define R300_TX_FORMAT2_0		    0x4500 /* obvious missing in gap */ +#       define R300_TX_PITCHMASK_SHIFT           0 +#       define R300_TX_PITCHMASK_MASK            (2047 << 0) +#	define R500_TXFORMAT_MSB		 (1 << 14) +#	define R500_TXWIDTH_BIT11	         (1 << 15) +#	define R500_TXHEIGHT_BIT11	         (1 << 16) +#	define R500_POW2FIX2FLT			 (1 << 17) +#	define R500_SEL_FILTER4_TC0		 (0 << 18) +#	define R500_SEL_FILTER4_TC1		 (1 << 18) +#	define R500_SEL_FILTER4_TC2		 (2 << 18) +#	define R500_SEL_FILTER4_TC3		 (3 << 18) + +#define R300_TX_OFFSET_0                    0x4540 +#define R300_TX_OFFSET_1                    0x4544 +#define R300_TX_OFFSET_2                    0x4548 +#define R300_TX_OFFSET_3                    0x454C +#define R300_TX_OFFSET_4                    0x4550 +#define R300_TX_OFFSET_5                    0x4554 +#define R300_TX_OFFSET_6                    0x4558 +#define R300_TX_OFFSET_7                    0x455C +	/* BEGIN: Guess from R200 */ +#       define R300_TXO_ENDIAN_NO_SWAP           (0 << 0) +#       define R300_TXO_ENDIAN_BYTE_SWAP         (1 << 0) +#       define R300_TXO_ENDIAN_WORD_SWAP         (2 << 0) +#       define R300_TXO_ENDIAN_HALFDW_SWAP       (3 << 0) +#       define R300_TXO_MACRO_TILE               (1 << 2) +#       define R300_TXO_MICRO_TILE_LINEAR        (0 << 3) +#       define R300_TXO_MICRO_TILE               (1 << 3) +#       define R300_TXO_MICRO_TILE_SQUARE        (2 << 3) +#       define R300_TXO_OFFSET_MASK              0xffffffe0 +#       define R300_TXO_OFFSET_SHIFT             5 +	/* END: Guess from R200 */ + +/* 32 bit chroma key */ +#define R300_TX_CHROMA_KEY_0                      0x4580 +#define R300_TX_CHROMA_KEY_1                      0x4584 +#define R300_TX_CHROMA_KEY_2                      0x4588 +#define R300_TX_CHROMA_KEY_3                      0x458c +#define R300_TX_CHROMA_KEY_4                      0x4590 +#define R300_TX_CHROMA_KEY_5                      0x4594 +#define R300_TX_CHROMA_KEY_6                      0x4598 +#define R300_TX_CHROMA_KEY_7                      0x459c +#define R300_TX_CHROMA_KEY_8                      0x45a0 +#define R300_TX_CHROMA_KEY_9                      0x45a4 +#define R300_TX_CHROMA_KEY_10                     0x45a8 +#define R300_TX_CHROMA_KEY_11                     0x45ac +#define R300_TX_CHROMA_KEY_12                     0x45b0 +#define R300_TX_CHROMA_KEY_13                     0x45b4 +#define R300_TX_CHROMA_KEY_14                     0x45b8 +#define R300_TX_CHROMA_KEY_15                     0x45bc +/* ff00ff00 == { 0, 1.0, 0, 1.0 } */ + +/* Border Color */ +#define R300_TX_BORDER_COLOR_0              0x45c0 +#define R300_TX_BORDER_COLOR_1              0x45c4 +#define R300_TX_BORDER_COLOR_2              0x45c8 +#define R300_TX_BORDER_COLOR_3              0x45cc +#define R300_TX_BORDER_COLOR_4              0x45d0 +#define R300_TX_BORDER_COLOR_5              0x45d4 +#define R300_TX_BORDER_COLOR_6              0x45d8 +#define R300_TX_BORDER_COLOR_7              0x45dc +#define R300_TX_BORDER_COLOR_8              0x45e0 +#define R300_TX_BORDER_COLOR_9              0x45e4 +#define R300_TX_BORDER_COLOR_10             0x45e8 +#define R300_TX_BORDER_COLOR_11             0x45ec +#define R300_TX_BORDER_COLOR_12             0x45f0 +#define R300_TX_BORDER_COLOR_13             0x45f4 +#define R300_TX_BORDER_COLOR_14             0x45f8 +#define R300_TX_BORDER_COLOR_15             0x45fc + + +/* END: Texture specification */ + +/* BEGIN: Fragment program instruction set */ + +/* Fragment programs are written directly into register space. + * There are separate instruction streams for texture instructions and ALU + * instructions. + * In order to synchronize these streams, the program is divided into up + * to 4 nodes. Each node begins with a number of TEX operations, followed + * by a number of ALU operations. + * The first node can have zero TEX ops, all subsequent nodes must have at + * least + * one TEX ops. + * All nodes must have at least one ALU op. + * + * The index of the last node is stored in PFS_CNTL_0: A value of 0 means + * 1 node, a value of 3 means 4 nodes. + * The total amount of instructions is defined in PFS_CNTL_2. The offsets are + * offsets into the respective instruction streams, while *_END points to the + * last instruction relative to this offset. + */ +#define R300_US_CONFIG                      0x4600 +#       define R300_PFS_CNTL_LAST_NODES_SHIFT    0 +#       define R300_PFS_CNTL_LAST_NODES_MASK     (3 << 0) +#       define R300_PFS_CNTL_FIRST_NODE_HAS_TEX  (1 << 3) +#define R300_US_PIXSIZE                     0x4604 +/* There is an unshifted value here which has so far always been equal to the + * index of the highest used temporary register. + */ +#define R300_US_CODE_OFFSET                 0x4608 +#       define R300_PFS_CNTL_ALU_OFFSET_SHIFT    0 +#       define R300_PFS_CNTL_ALU_OFFSET_MASK     (63 << 0) +#       define R300_PFS_CNTL_ALU_END_SHIFT       6 +#       define R300_PFS_CNTL_ALU_END_MASK        (63 << 6) +#       define R300_PFS_CNTL_TEX_OFFSET_SHIFT    13 +#       define R300_PFS_CNTL_TEX_OFFSET_MASK     (31 << 13) +#       define R300_PFS_CNTL_TEX_END_SHIFT       18 +#       define R300_PFS_CNTL_TEX_END_MASK        (31 << 18) + +/* gap */ + +/* Nodes are stored backwards. The last active node is always stored in + * PFS_NODE_3. + * Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The + * first node is stored in NODE_2, the second node is stored in NODE_3. + * + * Offsets are relative to the master offset from PFS_CNTL_2. + */ +#define R300_US_CODE_ADDR_0                 0x4610 +#define R300_US_CODE_ADDR_1                 0x4614 +#define R300_US_CODE_ADDR_2                 0x4618 +#define R300_US_CODE_ADDR_3                 0x461C +#       define R300_ALU_START_SHIFT         0 +#       define R300_ALU_START_MASK          (63 << 0) +#       define R300_ALU_SIZE_SHIFT          6 +#       define R300_ALU_SIZE_MASK           (63 << 6) +#       define R300_TEX_START_SHIFT         12 +#       define R300_TEX_START_MASK          (31 << 12) +#       define R300_TEX_SIZE_SHIFT          17 +#       define R300_TEX_SIZE_MASK           (31 << 17) +#	define R300_RGBA_OUT                (1 << 22) +#	define R300_W_OUT                   (1 << 23) + +/* TEX + * As far as I can tell, texture instructions cannot write into output + * registers directly. A subsequent ALU instruction is always necessary, + * even if it's just MAD o0, r0, 1, 0 + */ +#define R300_US_TEX_INST_0                  0x4620 +#	define R300_SRC_ADDR_SHIFT          0 +#	define R300_SRC_ADDR_MASK           (31 << 0) +#	define R300_DST_ADDR_SHIFT          6 +#	define R300_DST_ADDR_MASK           (31 << 6) +#	define R300_TEX_ID_SHIFT            11 +#       define R300_TEX_ID_MASK             (15 << 11) +#	define R300_TEX_INST_SHIFT		15 +#		define R300_TEX_OP_NOP	        0 +#		define R300_TEX_OP_LD	        1 +#		define R300_TEX_OP_KIL	        2 +#		define R300_TEX_OP_TXP	        3 +#		define R300_TEX_OP_TXB	        4 +#	define R300_TEX_INST_MASK               (7 << 15) + +/* Output format from the unfied shader */ +#define R300_US_OUT_FMT_0                   0x46A4 +#	define R300_US_OUT_FMT_C4_8         (0 << 0) +#	define R300_US_OUT_FMT_C4_10        (1 << 0) +#	define R300_US_OUT_FMT_C4_10_GAMMA  (2 << 0) +#	define R300_US_OUT_FMT_C_16         (3 << 0) +#	define R300_US_OUT_FMT_C2_16        (4 << 0) +#	define R300_US_OUT_FMT_C4_16        (5 << 0) +#	define R300_US_OUT_FMT_C_16_MPEG    (6 << 0) +#	define R300_US_OUT_FMT_C2_16_MPEG   (7 << 0) +#	define R300_US_OUT_FMT_C2_4         (8 << 0) +#	define R300_US_OUT_FMT_C_3_3_2      (9 << 0) +#	define R300_US_OUT_FMT_C_6_5_6      (10 << 0) +#	define R300_US_OUT_FMT_C_11_11_10   (11 << 0) +#	define R300_US_OUT_FMT_C_10_11_11   (12 << 0) +#	define R300_US_OUT_FMT_C_2_10_10_10 (13 << 0) +/* reserved */ +#	define R300_US_OUT_FMT_UNUSED       (15 << 0) +#	define R300_US_OUT_FMT_C_16_FP      (16 << 0) +#	define R300_US_OUT_FMT_C2_16_FP     (17 << 0) +#	define R300_US_OUT_FMT_C4_16_FP     (18 << 0) +#	define R300_US_OUT_FMT_C_32_FP      (19 << 0) +#	define R300_US_OUT_FMT_C2_32_FP     (20 << 0) +#	define R300_US_OUT_FMT_C4_32_FP     (21 << 0) +#   define R300_C0_SEL_A				(0 << 8) +#   define R300_C0_SEL_R				(1 << 8) +#   define R300_C0_SEL_G				(2 << 8) +#   define R300_C0_SEL_B				(3 << 8) +#   define R300_C1_SEL_A				(0 << 10) +#   define R300_C1_SEL_R				(1 << 10) +#   define R300_C1_SEL_G				(2 << 10) +#   define R300_C1_SEL_B				(3 << 10) +#   define R300_C2_SEL_A				(0 << 12) +#   define R300_C2_SEL_R				(1 << 12) +#   define R300_C2_SEL_G				(2 << 12) +#   define R300_C2_SEL_B				(3 << 12) +#   define R300_C3_SEL_A				(0 << 14) +#   define R300_C3_SEL_R				(1 << 14) +#   define R300_C3_SEL_G				(2 << 14) +#   define R300_C3_SEL_B				(3 << 14) +#   define R300_OUT_SIGN(x)				(x << 16) + +/* ALU + * The ALU instructions register blocks are enumerated according to the order + * in which fglrx. I assume there is space for 64 instructions, since + * each block has space for a maximum of 64 DWORDs, and this matches reported + * native limits. + * + * The basic functional block seems to be one MAD for each color and alpha, + * and an adder that adds all components after the MUL. + *  - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands + *  - DP4: Use OUTC_DP4, OUTA_DP4 + *  - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands + *  - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands + *  - CMPH: If ARG2 > 0.5, return ARG0, else return ARG1 + *  - CMP: If ARG2 < 0, return ARG1, else return ARG0 + *  - FLR: use FRC+MAD + *  - XPD: use MAD+MAD + *  - SGE, SLT: use MAD+CMP + *  - RSQ: use ABS modifier for argument + *  - Use OUTC_REPL_ALPHA to write results of an alpha-only operation + *    (e.g. RCP) into color register + *  - apparently, there's no quick DST operation + *  - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2" + *  - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0" + *  - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1" + * + * Operand selection + * First stage selects three sources from the available registers and + * constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha). + * fglrx sorts the three source fields: Registers before constants, + * lower indices before higher indices; I do not know whether this is + * necessary. + * + * fglrx fills unused sources with "read constant 0" + * According to specs, you cannot select more than two different constants. + * + * Second stage selects the operands from the sources. This is defined in + * INSTR0 (color) and INSTR2 (alpha). You can also select the special constants + * zero and one. + * Swizzling and negation happens in this stage, as well. + * + * Important: Color and alpha seem to be mostly separate, i.e. their sources + * selection appears to be fully independent (the register storage is probably + * physically split into a color and an alpha section). + * However (because of the apparent physical split), there is some interaction + * WRT swizzling. If, for example, you want to load an R component into an + * Alpha operand, this R component is taken from a *color* source, not from + * an alpha source. The corresponding register doesn't even have to appear in + * the alpha sources list. (I hope this all makes sense to you) + * + * Destination selection + * The destination register index is in FPI1 (color) and FPI3 (alpha) + * together with enable bits. + * There are separate enable bits for writing into temporary registers + * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_* + * /DSTA_OUTPUT). You can write to both at once, or not write at all (the + * same index must be used for both). + * + * Note: There is a special form for LRP + *  - Argument order is the same as in ARB_fragment_program. + *  - Operation is MAD + *  - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP + *  - Set FPI0/FPI2_SPECIAL_LRP + * Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD + */ +#define R300_US_ALU_RGB_ADDR_0                   0x46C0 +#       define R300_ALU_SRC0C_SHIFT             0 +#       define R300_ALU_SRC0C_MASK              (31 << 0) +#       define R300_ALU_SRC0C_CONST             (1 << 5) +#       define R300_ALU_SRC1C_SHIFT             6 +#       define R300_ALU_SRC1C_MASK              (31 << 6) +#       define R300_ALU_SRC1C_CONST             (1 << 11) +#       define R300_ALU_SRC2C_SHIFT             12 +#       define R300_ALU_SRC2C_MASK              (31 << 12) +#       define R300_ALU_SRC2C_CONST             (1 << 17) +#       define R300_ALU_SRC_MASK                0x0003ffff +#       define R300_ALU_DSTC_SHIFT              18 +#       define R300_ALU_DSTC_MASK               (31 << 18) +#		define R300_ALU_DSTC_REG_MASK_SHIFT     23 +#       define R300_ALU_DSTC_REG_X              (1 << 23) +#       define R300_ALU_DSTC_REG_Y              (1 << 24) +#       define R300_ALU_DSTC_REG_Z              (1 << 25) +#		define R300_ALU_DSTC_OUTPUT_MASK_SHIFT  26 +#       define R300_ALU_DSTC_OUTPUT_X           (1 << 26) +#       define R300_ALU_DSTC_OUTPUT_Y           (1 << 27) +#       define R300_ALU_DSTC_OUTPUT_Z           (1 << 28) + +#define R300_US_ALU_ALPHA_ADDR_0                 0x47C0 +#       define R300_ALU_SRC0A_SHIFT             0 +#       define R300_ALU_SRC0A_MASK              (31 << 0) +#       define R300_ALU_SRC0A_CONST             (1 << 5) +#       define R300_ALU_SRC1A_SHIFT             6 +#       define R300_ALU_SRC1A_MASK              (31 << 6) +#       define R300_ALU_SRC1A_CONST             (1 << 11) +#       define R300_ALU_SRC2A_SHIFT             12 +#       define R300_ALU_SRC2A_MASK              (31 << 12) +#       define R300_ALU_SRC2A_CONST             (1 << 17) +#       define R300_ALU_SRC_MASK                0x0003ffff +#       define R300_ALU_DSTA_SHIFT              18 +#       define R300_ALU_DSTA_MASK               (31 << 18) +#       define R300_ALU_DSTA_REG                (1 << 23) +#       define R300_ALU_DSTA_OUTPUT             (1 << 24) +#		define R300_ALU_DSTA_DEPTH              (1 << 27) + +#define R300_US_ALU_RGB_INST_0                   0x48C0 +#       define R300_ALU_ARGC_SRC0C_XYZ          0 +#       define R300_ALU_ARGC_SRC0C_XXX          1 +#       define R300_ALU_ARGC_SRC0C_YYY          2 +#       define R300_ALU_ARGC_SRC0C_ZZZ          3 +#       define R300_ALU_ARGC_SRC1C_XYZ          4 +#       define R300_ALU_ARGC_SRC1C_XXX          5 +#       define R300_ALU_ARGC_SRC1C_YYY          6 +#       define R300_ALU_ARGC_SRC1C_ZZZ          7 +#       define R300_ALU_ARGC_SRC2C_XYZ          8 +#       define R300_ALU_ARGC_SRC2C_XXX          9 +#       define R300_ALU_ARGC_SRC2C_YYY          10 +#       define R300_ALU_ARGC_SRC2C_ZZZ          11 +#       define R300_ALU_ARGC_SRC0A              12 +#       define R300_ALU_ARGC_SRC1A              13 +#       define R300_ALU_ARGC_SRC2A              14 +#       define R300_ALU_ARGC_SRCP_XYZ           15 +#       define R300_ALU_ARGC_SRCP_XXX           16 +#       define R300_ALU_ARGC_SRCP_YYY           17 +#       define R300_ALU_ARGC_SRCP_ZZZ           18 +#       define R300_ALU_ARGC_SRCP_WWW           19 +#       define R300_ALU_ARGC_ZERO               20 +#       define R300_ALU_ARGC_ONE                21 +#       define R300_ALU_ARGC_HALF               22 +#       define R300_ALU_ARGC_SRC0C_YZX          23 +#       define R300_ALU_ARGC_SRC1C_YZX          24 +#       define R300_ALU_ARGC_SRC2C_YZX          25 +#       define R300_ALU_ARGC_SRC0C_ZXY          26 +#       define R300_ALU_ARGC_SRC1C_ZXY          27 +#       define R300_ALU_ARGC_SRC2C_ZXY          28 +#       define R300_ALU_ARGC_SRC0CA_WZY         29 +#       define R300_ALU_ARGC_SRC1CA_WZY         30 +#       define R300_ALU_ARGC_SRC2CA_WZY         31 + +#       define R300_ALU_ARG0C_SHIFT             0 +#       define R300_ALU_ARG0C_MASK              (31 << 0) +#       define R300_ALU_ARG0C_NOP               (0 << 5) +#       define R300_ALU_ARG0C_NEG               (1 << 5) +#       define R300_ALU_ARG0C_ABS               (2 << 5) +#       define R300_ALU_ARG0C_NAB               (3 << 5) +#       define R300_ALU_ARG1C_SHIFT             7 +#       define R300_ALU_ARG1C_MASK              (31 << 7) +#       define R300_ALU_ARG1C_NOP               (0 << 12) +#       define R300_ALU_ARG1C_NEG               (1 << 12) +#       define R300_ALU_ARG1C_ABS               (2 << 12) +#       define R300_ALU_ARG1C_NAB               (3 << 12) +#       define R300_ALU_ARG2C_SHIFT             14 +#       define R300_ALU_ARG2C_MASK              (31 << 14) +#       define R300_ALU_ARG2C_NOP               (0 << 19) +#       define R300_ALU_ARG2C_NEG               (1 << 19) +#       define R300_ALU_ARG2C_ABS               (2 << 19) +#       define R300_ALU_ARG2C_NAB               (3 << 19) +#       define R300_ALU_SRCP_1_MINUS_2_SRC0     (0 << 21) +#       define R300_ALU_SRCP_SRC1_MINUS_SRC0    (1 << 21) +#       define R300_ALU_SRCP_SRC1_PLUS_SRC0     (2 << 21) +#       define R300_ALU_SRCP_1_MINUS_SRC0       (3 << 21) + +#       define R300_ALU_OUTC_MAD                (0 << 23) +#       define R300_ALU_OUTC_DP3                (1 << 23) +#       define R300_ALU_OUTC_DP4                (2 << 23) +#       define R300_ALU_OUTC_D2A                (3 << 23) +#       define R300_ALU_OUTC_MIN                (4 << 23) +#       define R300_ALU_OUTC_MAX                (5 << 23) +#       define R300_ALU_OUTC_CMPH               (7 << 23) +#       define R300_ALU_OUTC_CMP                (8 << 23) +#       define R300_ALU_OUTC_FRC                (9 << 23) +#       define R300_ALU_OUTC_REPL_ALPHA         (10 << 23) + +#       define R300_ALU_OUTC_MOD_NOP            (0 << 27) +#       define R300_ALU_OUTC_MOD_MUL2           (1 << 27) +#       define R300_ALU_OUTC_MOD_MUL4           (2 << 27) +#       define R300_ALU_OUTC_MOD_MUL8           (3 << 27) +#       define R300_ALU_OUTC_MOD_DIV2           (4 << 27) +#       define R300_ALU_OUTC_MOD_DIV4           (5 << 27) +#       define R300_ALU_OUTC_MOD_DIV8           (6 << 27) + +#       define R300_ALU_OUTC_CLAMP              (1 << 30) +#       define R300_ALU_INSERT_NOP              (1 << 31) + +#define R300_US_ALU_ALPHA_INST_0                 0x49C0 +#       define R300_ALU_ARGA_SRC0C_X            0 +#       define R300_ALU_ARGA_SRC0C_Y            1 +#       define R300_ALU_ARGA_SRC0C_Z            2 +#       define R300_ALU_ARGA_SRC1C_X            3 +#       define R300_ALU_ARGA_SRC1C_Y            4 +#       define R300_ALU_ARGA_SRC1C_Z            5 +#       define R300_ALU_ARGA_SRC2C_X            6 +#       define R300_ALU_ARGA_SRC2C_Y            7 +#       define R300_ALU_ARGA_SRC2C_Z            8 +#       define R300_ALU_ARGA_SRC0A              9 +#       define R300_ALU_ARGA_SRC1A              10 +#       define R300_ALU_ARGA_SRC2A              11 +#       define R300_ALU_ARGA_SRCP_X             12 +#       define R300_ALU_ARGA_SRCP_Y             13 +#       define R300_ALU_ARGA_SRCP_Z             14 +#       define R300_ALU_ARGA_SRCP_W             15 + +#       define R300_ALU_ARGA_ZERO               16 +#       define R300_ALU_ARGA_ONE                17 +#       define R300_ALU_ARGA_HALF               18 +#       define R300_ALU_ARG0A_SHIFT             0 +#       define R300_ALU_ARG0A_MASK              (31 << 0) +#       define R300_ALU_ARG0A_NOP               (0 << 5) +#       define R300_ALU_ARG0A_NEG               (1 << 5) +#	define R300_ALU_ARG0A_ABS		 (2 << 5) +#	define R300_ALU_ARG0A_NAB		 (3 << 5) +#       define R300_ALU_ARG1A_SHIFT             7 +#       define R300_ALU_ARG1A_MASK              (31 << 7) +#       define R300_ALU_ARG1A_NOP               (0 << 12) +#       define R300_ALU_ARG1A_NEG               (1 << 12) +#	define R300_ALU_ARG1A_ABS		 (2 << 12) +#	define R300_ALU_ARG1A_NAB		 (3 << 12) +#       define R300_ALU_ARG2A_SHIFT             14 +#       define R300_ALU_ARG2A_MASK              (31 << 14) +#       define R300_ALU_ARG2A_NOP               (0 << 19) +#       define R300_ALU_ARG2A_NEG               (1 << 19) +#	define R300_ALU_ARG2A_ABS		 (2 << 19) +#	define R300_ALU_ARG2A_NAB		 (3 << 19) +#       define R300_ALU_SRCP_1_MINUS_2_SRC0     (0 << 21) +#       define R300_ALU_SRCP_SRC1_MINUS_SRC0    (1 << 21) +#       define R300_ALU_SRCP_SRC1_PLUS_SRC0     (2 << 21) +#       define R300_ALU_SRCP_1_MINUS_SRC0       (3 << 21) + +#       define R300_ALU_OUTA_MAD                (0 << 23) +#       define R300_ALU_OUTA_DP4                (1 << 23) +#       define R300_ALU_OUTA_MIN                (2 << 23) +#       define R300_ALU_OUTA_MAX                (3 << 23) +#       define R300_ALU_OUTA_CND                (5 << 23) +#       define R300_ALU_OUTA_CMP                (6 << 23) +#       define R300_ALU_OUTA_FRC                (7 << 23) +#       define R300_ALU_OUTA_EX2                (8 << 23) +#       define R300_ALU_OUTA_LG2                (9 << 23) +#       define R300_ALU_OUTA_RCP                (10 << 23) +#       define R300_ALU_OUTA_RSQ                (11 << 23) + +#       define R300_ALU_OUTA_MOD_NOP            (0 << 27) +#       define R300_ALU_OUTA_MOD_MUL2           (1 << 27) +#       define R300_ALU_OUTA_MOD_MUL4           (2 << 27) +#       define R300_ALU_OUTA_MOD_MUL8           (3 << 27) +#       define R300_ALU_OUTA_MOD_DIV2           (4 << 27) +#       define R300_ALU_OUTA_MOD_DIV4           (5 << 27) +#       define R300_ALU_OUTA_MOD_DIV8           (6 << 27) + +#       define R300_ALU_OUTA_CLAMP              (1 << 30) +/* END: Fragment program instruction set */ + +/* Fog: Fog Blending Enable */ +#define R300_FG_FOG_BLEND                             0x4bc0 +#       define R300_FG_FOG_BLEND_DISABLE              (0 << 0) +#       define R300_FG_FOG_BLEND_ENABLE               (1 << 0) +#	define R300_FG_FOG_BLEND_FN_LINEAR            (0 << 1) +#	define R300_FG_FOG_BLEND_FN_EXP               (1 << 1) +#	define R300_FG_FOG_BLEND_FN_EXP2              (2 << 1) +#	define R300_FG_FOG_BLEND_FN_CONSTANT          (3 << 1) +#	define R300_FG_FOG_BLEND_FN_MASK              (3 << 1) + +/* Fog: Red Component of Fog Color */ +#define R300_FG_FOG_COLOR_R                           0x4bc8 +/* Fog: Green Component of Fog Color */ +#define R300_FG_FOG_COLOR_G                           0x4bcc +/* Fog: Blue Component of Fog Color */ +#define R300_FG_FOG_COLOR_B                           0x4bd0 +#	define R300_FG_FOG_COLOR_MASK 0x000003ff + +/* Fog: Constant Factor for Fog Blending */ +#define R300_FG_FOG_FACTOR                            0x4bc4 +#	define FG_FOG_FACTOR_MASK 0x000003ff + +/* Fog: Alpha function */ +#define R300_FG_ALPHA_FUNC                            0x4bd4 +#       define R300_FG_ALPHA_FUNC_VAL_MASK               0x000000ff +#       define R300_FG_ALPHA_FUNC_NEVER                     (0 << 8) +#       define R300_FG_ALPHA_FUNC_LESS                      (1 << 8) +#       define R300_FG_ALPHA_FUNC_EQUAL                     (2 << 8) +#       define R300_FG_ALPHA_FUNC_LE                        (3 << 8) +#       define R300_FG_ALPHA_FUNC_GREATER                   (4 << 8) +#       define R300_FG_ALPHA_FUNC_NOTEQUAL                  (5 << 8) +#       define R300_FG_ALPHA_FUNC_GE                        (6 << 8) +#       define R300_FG_ALPHA_FUNC_ALWAYS                    (7 << 8) +#       define R300_ALPHA_TEST_OP_MASK                      (7 << 8) +#       define R300_FG_ALPHA_FUNC_DISABLE                   (0 << 11) +#       define R300_FG_ALPHA_FUNC_ENABLE                    (1 << 11) + +#       define R500_FG_ALPHA_FUNC_10BIT                     (0 << 12) +#       define R500_FG_ALPHA_FUNC_8BIT                      (1 << 12) + +#       define R300_FG_ALPHA_FUNC_MASK_DISABLE              (0 << 16) +#       define R300_FG_ALPHA_FUNC_MASK_ENABLE               (1 << 16) +#       define R300_FG_ALPHA_FUNC_CFG_2_OF_4                (0 << 17) +#       define R300_FG_ALPHA_FUNC_CFG_3_OF_6                (1 << 17) + +#       define R300_FG_ALPHA_FUNC_DITH_DISABLE              (0 << 20) +#       define R300_FG_ALPHA_FUNC_DITH_ENABLE               (1 << 20) + +#       define R500_FG_ALPHA_FUNC_OFFSET_DISABLE            (0 << 24) +#       define R500_FG_ALPHA_FUNC_OFFSET_ENABLE             (1 << 24) /* Not supported in R520 */ +#       define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_DISABLE    (0 << 25) +#       define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_ENABLE     (1 << 25) + +#       define R500_FG_ALPHA_FUNC_FP16_DISABLE              (0 << 28) +#       define R500_FG_ALPHA_FUNC_FP16_ENABLE               (1 << 28) + + +/* Fog: Where does the depth come from? */ +#define R300_FG_DEPTH_SRC                  0x4bd8 +#	define R300_FG_DEPTH_SRC_SCAN   (0 << 0) +#	define R300_FG_DEPTH_SRC_SHADER (1 << 0) + +/* Fog: Alpha Compare Value */ +#define R500_FG_ALPHA_VALUE                0x4be0 +#	define R500_FG_ALPHA_VALUE_MASK 0x0000ffff + +/* gap */ + +/* Fragment program parameters in 7.16 floating point */ +#define R300_PFS_PARAM_0_X                  0x4C00 +#define R300_PFS_PARAM_0_Y                  0x4C04 +#define R300_PFS_PARAM_0_Z                  0x4C08 +#define R300_PFS_PARAM_0_W                  0x4C0C +/* last consts */ +#define R300_PFS_PARAM_31_X                 0x4DF0 +#define R300_PFS_PARAM_31_Y                 0x4DF4 +#define R300_PFS_PARAM_31_Z                 0x4DF8 +#define R300_PFS_PARAM_31_W                 0x4DFC + +/* Unpipelined. */ +#define R300_RB3D_CCTL                      0x4e00 +#	define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER                (0 << 5) +#	define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS               (1 << 5) +#	define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS               (2 << 5) +#	define R300_RB3D_CCTL_NUM_MULTIWRITES_4_BUFFERS               (3 << 5) +#	define R300_RB3D_CCTL_CLRCMP_FLIPE_DISABLE                    (0 << 7) +#	define R300_RB3D_CCTL_CLRCMP_FLIPE_ENABLE                     (1 << 7) +#	define R300_RB3D_CCTL_AA_COMPRESSION_DISABLE                  (0 << 9) +#	define R300_RB3D_CCTL_AA_COMPRESSION_ENABLE                   (1 << 9) +#	define R300_RB3D_CCTL_CMASK_DISABLE                           (0 << 10) +#	define R300_RB3D_CCTL_CMASK_ENABLE                            (1 << 10) +/* reserved */ +#	define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_DISABLE  (0 << 12) +#	define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_ENABLE   (1 << 12) +#	define R300_RB3D_CCTL_WRITE_COMPRESSION_ENABLE                (0 << 13) +#	define R300_RB3D_CCTL_WRITE_COMPRESSION_DISABLE               (1 << 13) +#	define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_DISABLE  (0 << 14) +#	define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE   (1 << 14) + + +/* Notes: + * - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in + *   the application + * - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and ABLEND + *    are set to the same + *   function (both registers are always set up completely in any case) + * - Most blend flags are simply copied from R200 and not tested yet + */ +#define R300_RB3D_CBLEND                    0x4E04 +#define R300_RB3D_ABLEND                    0x4E08 +/* the following only appear in CBLEND */ +#       define R300_ALPHA_BLEND_ENABLE         (1 << 0) +#       define R300_SEPARATE_ALPHA_ENABLE      (1 << 1) +#       define R300_READ_ENABLE                (1 << 2) +#       define R300_DISCARD_SRC_PIXELS_DIS     (0 << 3) +#       define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0     (1 << 3) +#       define R300_DISCARD_SRC_PIXELS_SRC_COLOR_0     (2 << 3) +#       define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0     (3 << 3) +#       define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1     (4 << 3) +#       define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1     (5 << 3) +#       define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1     (6 << 3) + +/* the following are shared between CBLEND and ABLEND */ +#       define R300_FCN_MASK                         (3  << 12) +#       define R300_COMB_FCN_ADD_CLAMP               (0  << 12) +#       define R300_COMB_FCN_ADD_NOCLAMP             (1  << 12) +#       define R300_COMB_FCN_SUB_CLAMP               (2  << 12) +#       define R300_COMB_FCN_SUB_NOCLAMP             (3  << 12) +#       define R300_COMB_FCN_MIN                     (4  << 12) +#       define R300_COMB_FCN_MAX                     (5  << 12) +#       define R300_COMB_FCN_RSUB_CLAMP              (6  << 12) +#       define R300_COMB_FCN_RSUB_NOCLAMP            (7  << 12) +#       define R300_BLEND_GL_ZERO                    (32) +#       define R300_BLEND_GL_ONE                     (33) +#       define R300_BLEND_GL_SRC_COLOR               (34) +#       define R300_BLEND_GL_ONE_MINUS_SRC_COLOR     (35) +#       define R300_BLEND_GL_DST_COLOR               (36) +#       define R300_BLEND_GL_ONE_MINUS_DST_COLOR     (37) +#       define R300_BLEND_GL_SRC_ALPHA               (38) +#       define R300_BLEND_GL_ONE_MINUS_SRC_ALPHA     (39) +#       define R300_BLEND_GL_DST_ALPHA               (40) +#       define R300_BLEND_GL_ONE_MINUS_DST_ALPHA     (41) +#       define R300_BLEND_GL_SRC_ALPHA_SATURATE      (42) +#       define R300_BLEND_GL_CONST_COLOR             (43) +#       define R300_BLEND_GL_ONE_MINUS_CONST_COLOR   (44) +#       define R300_BLEND_GL_CONST_ALPHA             (45) +#       define R300_BLEND_GL_ONE_MINUS_CONST_ALPHA   (46) +#       define R300_BLEND_MASK                       (63) +#       define R300_SRC_BLEND_SHIFT                  (16) +#       define R300_DST_BLEND_SHIFT                  (24) + +/* Constant color used by the blender. Pipelined through the blender. + * Note: For R520, this field is ignored, use RB3D_CONSTANT_COLOR_GB__BLUE, + * RB3D_CONSTANT_COLOR_GB__GREEN, etc. instead. + */ +#define R300_RB3D_BLEND_COLOR               0x4E10 + + +/* 3D Color Channel Mask. If all the channels used in the current color format + * are disabled, then the cb will discard all the incoming quads. Pipelined + * through the blender. + */ +#define RB3D_COLOR_CHANNEL_MASK                  0x4E0C +#	define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0  (1 << 0) +#	define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 (1 << 1) +#	define RB3D_COLOR_CHANNEL_MASK_RED_MASK0   (1 << 2) +#	define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 (1 << 3) +#	define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK1  (1 << 4) +#	define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK1 (1 << 5) +#	define RB3D_COLOR_CHANNEL_MASK_RED_MASK1   (1 << 6) +#	define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK1 (1 << 7) +#	define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK2  (1 << 8) +#	define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK2 (1 << 9) +#	define RB3D_COLOR_CHANNEL_MASK_RED_MASK2   (1 << 10) +#	define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK2 (1 << 11) +#	define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK3  (1 << 12) +#	define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK3 (1 << 13) +#	define RB3D_COLOR_CHANNEL_MASK_RED_MASK3   (1 << 14) +#	define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK3 (1 << 15) + +/* Clear color that is used when the color mask is set to 00. Unpipelined. + * Program this register with a 32-bit value in ARGB8888 or ARGB2101010 + * formats, ignoring the fields. + */ +#define RB3D_COLOR_CLEAR_VALUE                   0x4e14 + +/* gap */ + +/* Color Compare Color. Stalls the 2d/3d datapath until it is idle. */ +#define RB3D_CLRCMP_CLR                     0x4e20 + +/* Color Compare Mask. Stalls the 2d/3d datapath until it is idle. */ +#define RB3D_CLRCMP_MSK                     0x4e24 + +/* Color Buffer Address Offset of multibuffer 0. Unpipelined. */ +#define R300_RB3D_COLOROFFSET0              0x4E28 +#       define R300_COLOROFFSET_MASK             0xFFFFFFE0 +/* Color Buffer Address Offset of multibuffer 1. Unpipelined. */ +#define R300_RB3D_COLOROFFSET1              0x4E2C +/* Color Buffer Address Offset of multibuffer 2. Unpipelined. */ +#define R300_RB3D_COLOROFFSET2              0x4E30 +/* Color Buffer Address Offset of multibuffer 3. Unpipelined. */ +#define R300_RB3D_COLOROFFSET3              0x4E34 + +/* Color buffer format and tiling control for all the multibuffers and the + * pitch of multibuffer 0 to 3. Unpipelined. The cache must be empty before any + * of the registers are changed. + * + * Bit 16: Larger tiles + * Bit 17: 4x2 tiles + * Bit 18: Extremely weird tile like, but some pixels duplicated? + */ +#define R300_RB3D_COLORPITCH0               0x4E38 +#       define R300_COLORPITCH_MASK              0x00003FFE +#       define R300_COLOR_TILE_DISABLE            (0 << 16) +#       define R300_COLOR_TILE_ENABLE             (1 << 16) +#       define R300_COLOR_MICROTILE_DISABLE       (0 << 17) +#       define R300_COLOR_MICROTILE_ENABLE        (1 << 17) +#       define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */ +#       define R300_COLOR_ENDIAN_NO_SWAP          (0 << 19) +#       define R300_COLOR_ENDIAN_WORD_SWAP        (1 << 19) +#       define R300_COLOR_ENDIAN_DWORD_SWAP       (2 << 19) +#       define R300_COLOR_ENDIAN_HALF_DWORD_SWAP  (3 << 19) +#	define R500_COLOR_FORMAT_ARGB10101010     (0 << 21) +#	define R500_COLOR_FORMAT_UV1010           (1 << 21) +#	define R500_COLOR_FORMAT_CI8              (2 << 21) /* 2D only */ +#	define R300_COLOR_FORMAT_ARGB1555         (3 << 21) +#       define R300_COLOR_FORMAT_RGB565           (4 << 21) +#       define R500_COLOR_FORMAT_ARGB2101010      (5 << 21) +#       define R300_COLOR_FORMAT_ARGB8888         (6 << 21) +#       define R300_COLOR_FORMAT_ARGB32323232     (7 << 21) +/* reserved */ +#       define R300_COLOR_FORMAT_I8               (9 << 21) +#       define R300_COLOR_FORMAT_ARGB16161616     (10 << 21) +#       define R300_COLOR_FORMAT_VYUY             (11 << 21) +#       define R300_COLOR_FORMAT_YVYU             (12 << 21) +#       define R300_COLOR_FORMAT_UV88             (13 << 21) +#       define R500_COLOR_FORMAT_I10              (14 << 21) +#       define R300_COLOR_FORMAT_ARGB4444         (15 << 21) +#define R300_RB3D_COLORPITCH1               0x4E3C +#define R300_RB3D_COLORPITCH2               0x4E40 +#define R300_RB3D_COLORPITCH3               0x4E44 + +/* gap */ + +/* Destination Color Buffer Cache Control/Status. If the cb is in e2 mode, then + * a flush or free will not occur upon a write to this register, but a sync + * will be immediately sent if one is requested. If both DC_FLUSH and DC_FREE + * are zero but DC_FINISH is one, then a sync will be sent immediately -- the + * cb will not wait for all the previous operations to complete before sending + * the sync. Unpipelined except when DC_FINISH and DC_FREE are both set to + * zero. + * + * Set to 0A before 3D operations, set to 02 afterwards. + */ +#define R300_RB3D_DSTCACHE_CTLSTAT               0x4e4c +#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT         (0 << 0) +#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT_1       (1 << 0) +#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D    (2 << 0) +#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D_1  (3 << 0) +#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT          (0 << 2) +#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT_1        (1 << 2) +#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS       (2 << 2) +#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS_1     (3 << 2) +#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_NO_SIGNAL        (0 << 4) +#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL           (1 << 4) + +#define R300_RB3D_DITHER_CTL 0x4E50 +#	define R300_RB3D_DITHER_CTL_DITHER_MODE_TRUNCATE         (0 << 0) +#	define R300_RB3D_DITHER_CTL_DITHER_MODE_ROUND            (1 << 0) +#	define R300_RB3D_DITHER_CTL_DITHER_MODE_LUT              (2 << 0) +/* reserved */ +#	define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_TRUNCATE   (0 << 2) +#	define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_ROUND      (1 << 2) +#	define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT        (2 << 2) +/* reserved */ + +/* Resolve buffer destination address. The cache must be empty before changing + * this register if the cb is in resolve mode. Unpipelined + */ +#define R300_RB3D_AARESOLVE_OFFSET        0x4e80 +#	define R300_RB3D_AARESOLVE_OFFSET_SHIFT 5 +#	define R300_RB3D_AARESOLVE_OFFSET_MASK 0xffffffe0 /* At least according to the calculations of Christoph Brill */ + +/* Resolve Buffer Pitch and Tiling Control. The cache must be empty before + * changing this register if the cb is in resolve mode. Unpipelined + */ +#define R300_RB3D_AARESOLVE_PITCH         0x4e84 +#	define R300_RB3D_AARESOLVE_PITCH_SHIFT 1 +#	define R300_RB3D_AARESOLVE_PITCH_MASK  0x00003ffe /* At least according to the calculations of Christoph Brill */ + +/* Resolve Buffer Control. Unpipelined */ +#define R300_RB3D_AARESOLVE_CTL           0x4e88 +#	define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_NORMAL   (0 << 0) +#	define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE  (1 << 0) +#	define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_10      (0 << 1) +#	define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22      (1 << 1) +#	define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2) +#	define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2) + + +/* Discard src pixels less than or equal to threshold. */ +#define R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0 +/* Discard src pixels greater than or equal to threshold. */ +#define R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD 0x4ea4 +#	define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_SHIFT 0 +#	define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_MASK 0x000000ff +#	define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_SHIFT 8 +#	define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_MASK 0x0000ff00 +#	define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_SHIFT 16 +#	define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_MASK 0x00ff0000 +#	define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_SHIFT 24 +#	define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_MASK 0xff000000 + +/* 3D ROP Control. Stalls the 2d/3d datapath until it is idle. */ +#define R300_RB3D_ROPCNTL                             0x4e18 +#	define R300_RB3D_ROPCNTL_ROP_ENABLE            0x00000004 +#	define R300_RB3D_ROPCNTL_ROP_MASK              (15 << 8) +#	define R300_RB3D_ROPCNTL_ROP_SHIFT             8 + +/* Color Compare Flip. Stalls the 2d/3d datapath until it is idle. */ +#define R300_RB3D_CLRCMP_FLIPE                        0x4e1c + +/* Sets the fifo sizes */ +#define R500_RB3D_FIFO_SIZE                           0x4ef4 +#	define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_FULL   (0 << 0) +#	define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_HALF   (1 << 0) +#	define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0) +#	define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (3 << 0) + +/* Constant color used by the blender. Pipelined through the blender. */ +#define R500_RB3D_CONSTANT_COLOR_AR                   0x4ef8 +#	define R500_RB3D_CONSTANT_COLOR_AR_RED_MASK    0x0000ffff +#	define R500_RB3D_CONSTANT_COLOR_AR_RED_SHIFT   0 +#	define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_MASK  0xffff0000 +#	define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_SHIFT 16 + +/* Constant color used by the blender. Pipelined through the blender. */ +#define R500_RB3D_CONSTANT_COLOR_GB                   0x4efc +#	define R500_RB3D_CONSTANT_COLOR_AR_BLUE_MASK   0x0000ffff +#	define R500_RB3D_CONSTANT_COLOR_AR_BLUE_SHIFT  0 +#	define R500_RB3D_CONSTANT_COLOR_AR_GREEN_MASK  0xffff0000 +#	define R500_RB3D_CONSTANT_COLOR_AR_GREEN_SHIFT 16 + +/* gap */ +/* There seems to be no "write only" setting, so use Z-test = ALWAYS + * for this. + * Bit (1<<8) is the "test" bit. so plain write is 6  - vd + */ +#define R300_ZB_CNTL                             0x4F00 +#	define R300_STENCIL_ENABLE		 (1 << 0) +#	define R300_Z_ENABLE		         (1 << 1) +#	define R300_Z_WRITE_ENABLE		 (1 << 2) +#	define R300_Z_SIGNED_COMPARE		 (1 << 3) +#	define R300_STENCIL_FRONT_BACK		 (1 << 4) + +#define R300_ZB_ZSTENCILCNTL                   0x4f04 +	/* functions */ +#	define R300_ZS_NEVER			0 +#	define R300_ZS_LESS			1 +#	define R300_ZS_LEQUAL			2 +#	define R300_ZS_EQUAL			3 +#	define R300_ZS_GEQUAL			4 +#	define R300_ZS_GREATER			5 +#	define R300_ZS_NOTEQUAL			6 +#	define R300_ZS_ALWAYS			7 +#       define R300_ZS_MASK                     7 +	/* operations */ +#	define R300_ZS_KEEP			0 +#	define R300_ZS_ZERO			1 +#	define R300_ZS_REPLACE			2 +#	define R300_ZS_INCR			3 +#	define R300_ZS_DECR			4 +#	define R300_ZS_INVERT			5 +#	define R300_ZS_INCR_WRAP		6 +#	define R300_ZS_DECR_WRAP		7 +#	define R300_Z_FUNC_SHIFT		0 +	/* front and back refer to operations done for front +	   and back faces, i.e. separate stencil function support */ +#	define R300_S_FRONT_FUNC_SHIFT	        3 +#	define R300_S_FRONT_SFAIL_OP_SHIFT	6 +#	define R300_S_FRONT_ZPASS_OP_SHIFT	9 +#	define R300_S_FRONT_ZFAIL_OP_SHIFT      12 +#	define R300_S_BACK_FUNC_SHIFT           15 +#	define R300_S_BACK_SFAIL_OP_SHIFT       18 +#	define R300_S_BACK_ZPASS_OP_SHIFT       21 +#	define R300_S_BACK_ZFAIL_OP_SHIFT       24 + +#define R300_ZB_STENCILREFMASK                        0x4f08 +#	define R300_STENCILREF_SHIFT       0 +#	define R300_STENCILREF_MASK        0x000000ff +#	define R300_STENCILMASK_SHIFT      8 +#	define R300_STENCILMASK_MASK       0x0000ff00 +#	define R300_STENCILWRITEMASK_SHIFT 16 +#	define R300_STENCILWRITEMASK_MASK  0x00ff0000 + +/* gap */ + +#define R300_ZB_FORMAT                             0x4f10 +#	define R300_DEPTHFORMAT_16BIT_INT_Z   (0 << 0) +#	define R300_DEPTHFORMAT_16BIT_13E3    (1 << 0) +#	define R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL   (2 << 0) +/* reserved up to (15 << 0) */ +#	define R300_INVERT_13E3_LEADING_ONES  (0 << 4) +#	define R300_INVERT_13E3_LEADING_ZEROS (1 << 4) + +#define R300_ZB_ZTOP                             0x4F14 +#	define R300_ZTOP_DISABLE                 (0 << 0) +#	define R300_ZTOP_ENABLE                  (1 << 0) + +/* gap */ + +#define R300_ZB_ZCACHE_CTLSTAT            0x4f18 +#       define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT      (0 << 0) +#       define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0) +#       define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT       (0 << 1) +#       define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE            (1 << 1) +#       define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE            (0 << 31) +#       define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY            (1 << 31) + +#define R300_ZB_BW_CNTL                     0x4f1c +#	define R300_HIZ_DISABLE                              (0 << 0) +#	define R300_HIZ_ENABLE                               (1 << 0) +#	define R300_HIZ_MIN                                  (0 << 1) +#	define R300_HIZ_MAX                                  (1 << 1) +#	define R300_FAST_FILL_DISABLE                        (0 << 2) +#	define R300_FAST_FILL_ENABLE                         (1 << 2) +#	define R300_RD_COMP_DISABLE                          (0 << 3) +#	define R300_RD_COMP_ENABLE                           (1 << 3) +#	define R300_WR_COMP_DISABLE                          (0 << 4) +#	define R300_WR_COMP_ENABLE                           (1 << 4) +#	define R300_ZB_CB_CLEAR_RMW                          (0 << 5) +#	define R300_ZB_CB_CLEAR_CACHE_LINEAR                 (1 << 5) +#	define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE   (0 << 6) +#	define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE    (1 << 6) + +#	define R500_ZEQUAL_OPTIMIZE_ENABLE                   (0 << 7) +#	define R500_ZEQUAL_OPTIMIZE_DISABLE                  (1 << 7) +#	define R500_SEQUAL_OPTIMIZE_ENABLE                   (0 << 8) +#	define R500_SEQUAL_OPTIMIZE_DISABLE                  (1 << 8) + +#	define R500_BMASK_ENABLE                             (0 << 10) +#	define R500_BMASK_DISABLE                            (1 << 10) +#	define R500_HIZ_EQUAL_REJECT_DISABLE                 (0 << 11) +#	define R500_HIZ_EQUAL_REJECT_ENABLE                  (1 << 11) +#	define R500_HIZ_FP_EXP_BITS_DISABLE                  (0 << 12) +#	define R500_HIZ_FP_EXP_BITS_1                        (1 << 12) +#	define R500_HIZ_FP_EXP_BITS_2                        (2 << 12) +#	define R500_HIZ_FP_EXP_BITS_3                        (3 << 12) +#	define R500_HIZ_FP_EXP_BITS_4                        (4 << 12) +#	define R500_HIZ_FP_EXP_BITS_5                        (5 << 12) +#	define R500_HIZ_FP_INVERT_LEADING_ONES               (0 << 15) +#	define R500_HIZ_FP_INVERT_LEADING_ZEROS              (1 << 15) +#	define R500_TILE_OVERWRITE_RECOMPRESSION_ENABLE      (0 << 16) +#	define R500_TILE_OVERWRITE_RECOMPRESSION_DISABLE     (1 << 16) +#	define R500_CONTIGUOUS_6XAA_SAMPLES_ENABLE           (0 << 17) +#	define R500_CONTIGUOUS_6XAA_SAMPLES_DISABLE          (1 << 17) +#	define R500_PEQ_PACKING_DISABLE                      (0 << 18) +#	define R500_PEQ_PACKING_ENABLE                       (1 << 18) +#	define R500_COVERED_PTR_MASKING_DISABLE              (0 << 18) +#	define R500_COVERED_PTR_MASKING_ENABLE               (1 << 18) + + +/* gap */ + +/* Z Buffer Address Offset. + * Bits 31 to 5 are used for aligned Z buffer address offset for macro tiles. + */ +#define R300_ZB_DEPTHOFFSET               0x4f20 + +/* Z Buffer Pitch and Endian Control */ +#define R300_ZB_DEPTHPITCH                0x4f24 +#       define R300_DEPTHPITCH_MASK              0x00003FFC +#       define R300_DEPTHMACROTILE_DISABLE      (0 << 16) +#       define R300_DEPTHMACROTILE_ENABLE       (1 << 16) +#       define R300_DEPTHMICROTILE_LINEAR       (0 << 17) +#       define R300_DEPTHMICROTILE_TILED        (1 << 17) +#       define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +#       define R300_DEPTHENDIAN_NO_SWAP         (0 << 18) +#       define R300_DEPTHENDIAN_WORD_SWAP       (1 << 18) +#       define R300_DEPTHENDIAN_DWORD_SWAP      (2 << 18) +#       define R300_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18) + +/* Z Buffer Clear Value */ +#define R300_ZB_DEPTHCLEARVALUE                  0x4f28 + +/* Hierarchical Z Memory Offset */ +#define R300_ZB_HIZ_OFFSET                       0x4f44 + +/* Hierarchical Z Write Index */ +#define R300_ZB_HIZ_WRINDEX                      0x4f48 + +/* Hierarchical Z Data */ +#define R300_ZB_HIZ_DWORD                        0x4f4c + +/* Hierarchical Z Read Index */ +#define R300_ZB_HIZ_RDINDEX                      0x4f50 + +/* Hierarchical Z Pitch */ +#define R300_ZB_HIZ_PITCH                        0x4f54 + +/* Z Buffer Z Pass Counter Data */ +#define R300_ZB_ZPASS_DATA                       0x4f58 + +/* Z Buffer Z Pass Counter Address */ +#define R300_ZB_ZPASS_ADDR                       0x4f5c + +/* Depth buffer X and Y coordinate offset */ +#define R300_ZB_DEPTHXY_OFFSET                   0x4f60 +#	define R300_DEPTHX_OFFSET_SHIFT  1 +#	define R300_DEPTHX_OFFSET_MASK   0x000007FE +#	define R300_DEPTHY_OFFSET_SHIFT  17 +#	define R300_DEPTHY_OFFSET_MASK   0x07FE0000 + +/* Sets the fifo sizes */ +#define R500_ZB_FIFO_SIZE                        0x4fd0 +#	define R500_OP_FIFO_SIZE_FULL   (0 << 0) +#	define R500_OP_FIFO_SIZE_HALF   (1 << 0) +#	define R500_OP_FIFO_SIZE_QUATER (2 << 0) +#	define R500_OP_FIFO_SIZE_EIGTHS (4 << 0) + +/* Stencil Reference Value and Mask for backfacing quads */ +/* R300_ZB_STENCILREFMASK handles front face */ +#define R500_ZB_STENCILREFMASK_BF                0x4fd4 +#	define R500_STENCILREF_SHIFT       0 +#	define R500_STENCILREF_MASK        0x000000ff +#	define R500_STENCILMASK_SHIFT      8 +#	define R500_STENCILMASK_MASK       0x0000ff00 +#	define R500_STENCILWRITEMASK_SHIFT 16 +#	define R500_STENCILWRITEMASK_MASK  0x00ff0000 + +/** + * \defgroup R3XX_R5XX_PROGRAMMABLE_VERTEX_SHADER_DESCRIPTION R3XX-R5XX PROGRAMMABLE VERTEX SHADER DESCRIPTION + * + * The PVS_DST_MATH_INST is used to identify whether the instruction is a Vector + * Engine instruction or a Math Engine instruction. + */ + +/*\{*/ + +enum { +	/* R3XX */ +	VECTOR_NO_OP			= 0, +	VE_DOT_PRODUCT			= 1, +	VE_MULTIPLY			= 2, +	VE_ADD				= 3, +	VE_MULTIPLY_ADD			= 4, +	VE_DISTANCE_VECTOR		= 5, +	VE_FRACTION			= 6, +	VE_MAXIMUM			= 7, +	VE_MINIMUM			= 8, +	VE_SET_GREATER_THAN_EQUAL	= 9, +	VE_SET_LESS_THAN		= 10, +	VE_MULTIPLYX2_ADD		= 11, +	VE_MULTIPLY_CLAMP		= 12, +	VE_FLT2FIX_DX			= 13, +	VE_FLT2FIX_DX_RND		= 14, +	/* R5XX */ +	VE_PRED_SET_EQ_PUSH		= 15, +	VE_PRED_SET_GT_PUSH		= 16, +	VE_PRED_SET_GTE_PUSH		= 17, +	VE_PRED_SET_NEQ_PUSH		= 18, +	VE_COND_WRITE_EQ		= 19, +	VE_COND_WRITE_GT		= 20, +	VE_COND_WRITE_GTE		= 21, +	VE_COND_WRITE_NEQ		= 22, +	VE_COND_MUX_EQ			= 23, +	VE_COND_MUX_GT			= 24, +	VE_COND_MUX_GTE			= 25, +	VE_SET_GREATER_THAN		= 26, +	VE_SET_EQUAL			= 27, +	VE_SET_NOT_EQUAL		= 28, +}; + +enum { +	/* R3XX */ +	MATH_NO_OP			= 0, +	ME_EXP_BASE2_DX			= 1, +	ME_LOG_BASE2_DX			= 2, +	ME_EXP_BASEE_FF			= 3, +	ME_LIGHT_COEFF_DX		= 4, +	ME_POWER_FUNC_FF		= 5, +	ME_RECIP_DX			= 6, +	ME_RECIP_FF			= 7, +	ME_RECIP_SQRT_DX		= 8, +	ME_RECIP_SQRT_FF		= 9, +	ME_MULTIPLY			= 10, +	ME_EXP_BASE2_FULL_DX		= 11, +	ME_LOG_BASE2_FULL_DX		= 12, +	ME_POWER_FUNC_FF_CLAMP_B	= 13, +	ME_POWER_FUNC_FF_CLAMP_B1	= 14, +	ME_POWER_FUNC_FF_CLAMP_01	= 15, +	ME_SIN				= 16, +	ME_COS				= 17, +	/* R5XX */ +	ME_LOG_BASE2_IEEE		= 18, +	ME_RECIP_IEEE			= 19, +	ME_RECIP_SQRT_IEEE		= 20, +	ME_PRED_SET_EQ			= 21, +	ME_PRED_SET_GT			= 22, +	ME_PRED_SET_GTE			= 23, +	ME_PRED_SET_NEQ			= 24, +	ME_PRED_SET_CLR			= 25, +	ME_PRED_SET_INV			= 26, +	ME_PRED_SET_POP			= 27, +	ME_PRED_SET_RESTORE		= 28, +}; + +enum { +	/* R3XX */ +	PVS_MACRO_OP_2CLK_MADD		= 0, +	PVS_MACRO_OP_2CLK_M2X_ADD	= 1, +}; + +enum { +	PVS_SRC_REG_TEMPORARY		= 0,	/* Intermediate Storage */ +	PVS_SRC_REG_INPUT		= 1,	/* Input Vertex Storage */ +	PVS_SRC_REG_CONSTANT		= 2,	/* Constant State Storage */ +	PVS_SRC_REG_ALT_TEMPORARY	= 3,	/* Alternate Intermediate Storage */ +}; + +enum { +	PVS_DST_REG_TEMPORARY		= 0,	/* Intermediate Storage */ +	PVS_DST_REG_A0			= 1,	/* Address Register Storage */ +	PVS_DST_REG_OUT			= 2,	/* Output Memory. Used for all outputs */ +	PVS_DST_REG_OUT_REPL_X		= 3,	/* Output Memory & Replicate X to all channels */ +	PVS_DST_REG_ALT_TEMPORARY	= 4,	/* Alternate Intermediate Storage */ +	PVS_DST_REG_INPUT		= 5,	/* Output Memory & Replicate X to all channels */ +}; + +enum { +	PVS_SRC_SELECT_X		= 0,	/* Select X Component */ +	PVS_SRC_SELECT_Y		= 1,	/* Select Y Component */ +	PVS_SRC_SELECT_Z		= 2,	/* Select Z Component */ +	PVS_SRC_SELECT_W		= 3,	/* Select W Component */ +	PVS_SRC_SELECT_FORCE_0		= 4,	/* Force Component to 0.0 */ +	PVS_SRC_SELECT_FORCE_1		= 5,	/* Force Component to 1.0 */ +}; + +/* PVS Opcode & Destination Operand Description */ + +enum { +	PVS_DST_OPCODE_MASK		= 0x3f, +	PVS_DST_OPCODE_SHIFT		= 0, +	PVS_DST_MATH_INST_MASK		= 0x1, +	PVS_DST_MATH_INST_SHIFT		= 6, +	PVS_DST_MACRO_INST_MASK		= 0x1, +	PVS_DST_MACRO_INST_SHIFT	= 7, +	PVS_DST_REG_TYPE_MASK		= 0xf, +	PVS_DST_REG_TYPE_SHIFT		= 8, +	PVS_DST_ADDR_MODE_1_MASK	= 0x1, +	PVS_DST_ADDR_MODE_1_SHIFT	= 12, +	PVS_DST_OFFSET_MASK		= 0x7f, +	PVS_DST_OFFSET_SHIFT		= 13, +	PVS_DST_WE_X_MASK		= 0x1, +	PVS_DST_WE_X_SHIFT		= 20, +	PVS_DST_WE_Y_MASK		= 0x1, +	PVS_DST_WE_Y_SHIFT		= 21, +	PVS_DST_WE_Z_MASK		= 0x1, +	PVS_DST_WE_Z_SHIFT		= 22, +	PVS_DST_WE_W_MASK		= 0x1, +	PVS_DST_WE_W_SHIFT		= 23, +	PVS_DST_VE_SAT_MASK		= 0x1, +	PVS_DST_VE_SAT_SHIFT		= 24, +	PVS_DST_ME_SAT_MASK		= 0x1, +	PVS_DST_ME_SAT_SHIFT		= 25, +	PVS_DST_PRED_ENABLE_MASK	= 0x1, +	PVS_DST_PRED_ENABLE_SHIFT	= 26, +	PVS_DST_PRED_SENSE_MASK		= 0x1, +	PVS_DST_PRED_SENSE_SHIFT	= 27, +	PVS_DST_DUAL_MATH_OP_MASK	= 0x3, +	PVS_DST_DUAL_MATH_OP_SHIFT	= 27, +	PVS_DST_ADDR_SEL_MASK		= 0x3, +	PVS_DST_ADDR_SEL_SHIFT		= 29, +	PVS_DST_ADDR_MODE_0_MASK	= 0x1, +	PVS_DST_ADDR_MODE_0_SHIFT	= 31, +}; + +/* PVS Source Operand Description */ + +enum { +	PVS_SRC_REG_TYPE_MASK		= 0x3, +	PVS_SRC_REG_TYPE_SHIFT		= 0, +	SPARE_0_MASK			= 0x1, +	SPARE_0_SHIFT			= 2, +	PVS_SRC_ABS_XYZW_MASK		= 0x1, +	PVS_SRC_ABS_XYZW_SHIFT		= 3, +	PVS_SRC_ADDR_MODE_0_MASK	= 0x1, +	PVS_SRC_ADDR_MODE_0_SHIFT	= 4, +	PVS_SRC_OFFSET_MASK		= 0xff, +	PVS_SRC_OFFSET_SHIFT		= 5, +	PVS_SRC_SWIZZLE_X_MASK		= 0x7, +	PVS_SRC_SWIZZLE_X_SHIFT		= 13, +	PVS_SRC_SWIZZLE_Y_MASK		= 0x7, +	PVS_SRC_SWIZZLE_Y_SHIFT		= 16, +	PVS_SRC_SWIZZLE_Z_MASK		= 0x7, +	PVS_SRC_SWIZZLE_Z_SHIFT		= 19, +	PVS_SRC_SWIZZLE_W_MASK		= 0x7, +	PVS_SRC_SWIZZLE_W_SHIFT		= 22, +	PVS_SRC_MODIFIER_X_MASK		= 0x1, +	PVS_SRC_MODIFIER_X_SHIFT	= 25, +	PVS_SRC_MODIFIER_Y_MASK		= 0x1, +	PVS_SRC_MODIFIER_Y_SHIFT	= 26, +	PVS_SRC_MODIFIER_Z_MASK		= 0x1, +	PVS_SRC_MODIFIER_Z_SHIFT	= 27, +	PVS_SRC_MODIFIER_W_MASK		= 0x1, +	PVS_SRC_MODIFIER_W_SHIFT	= 28, +	PVS_SRC_ADDR_SEL_MASK		= 0x3, +	PVS_SRC_ADDR_SEL_SHIFT		= 29, +	PVS_SRC_ADDR_MODE_1_MASK	= 0x0, +	PVS_SRC_ADDR_MODE_1_SHIFT	= 32, +}; + +/*\}*/ + +/* BEGIN: Packet 3 commands */ + +/* A primitive emission dword. */ +#define R300_PRIM_TYPE_NONE                     (0 << 0) +#define R300_PRIM_TYPE_POINT                    (1 << 0) +#define R300_PRIM_TYPE_LINE                     (2 << 0) +#define R300_PRIM_TYPE_LINE_STRIP               (3 << 0) +#define R300_PRIM_TYPE_TRI_LIST                 (4 << 0) +#define R300_PRIM_TYPE_TRI_FAN                  (5 << 0) +#define R300_PRIM_TYPE_TRI_STRIP                (6 << 0) +#define R300_PRIM_TYPE_TRI_TYPE2                (7 << 0) +#define R300_PRIM_TYPE_RECT_LIST                (8 << 0) +#define R300_PRIM_TYPE_3VRT_POINT_LIST          (9 << 0) +#define R300_PRIM_TYPE_3VRT_LINE_LIST           (10 << 0) +	/* GUESS (based on r200) */ +#define R300_PRIM_TYPE_POINT_SPRITES            (11 << 0) +#define R300_PRIM_TYPE_LINE_LOOP                (12 << 0) +#define R300_PRIM_TYPE_QUADS                    (13 << 0) +#define R300_PRIM_TYPE_QUAD_STRIP               (14 << 0) +#define R300_PRIM_TYPE_POLYGON                  (15 << 0) +#define R300_PRIM_TYPE_MASK                     0xF +#define R300_PRIM_WALK_IND                      (1 << 4) +#define R300_PRIM_WALK_LIST                     (2 << 4) +#define R300_PRIM_WALK_RING                     (3 << 4) +#define R300_PRIM_WALK_MASK                     (3 << 4) +	/* GUESS (based on r200) */ +#define R300_PRIM_COLOR_ORDER_BGRA              (0 << 6) +#define R300_PRIM_COLOR_ORDER_RGBA              (1 << 6) +#define R300_PRIM_NUM_VERTICES_SHIFT            16 +#define R300_PRIM_NUM_VERTICES_MASK             0xffff + + + +/* + * The R500 unified shader (US) registers come in banks of 512 each, one + * for each instruction slot in the shader.  You can't touch them directly. + * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive + * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the + * instruction is fully specified. + */ +#define R500_US_ALU_ALPHA_INST_0			0xa800 +#   define R500_ALPHA_OP_MAD				0 +#   define R500_ALPHA_OP_DP				1 +#   define R500_ALPHA_OP_MIN				2 +#   define R500_ALPHA_OP_MAX				3 +/* #define R500_ALPHA_OP_RESERVED			4 */ +#   define R500_ALPHA_OP_CND				5 +#   define R500_ALPHA_OP_CMP				6 +#   define R500_ALPHA_OP_FRC				7 +#   define R500_ALPHA_OP_EX2				8 +#   define R500_ALPHA_OP_LN2				9 +#   define R500_ALPHA_OP_RCP				10 +#   define R500_ALPHA_OP_RSQ				11 +#   define R500_ALPHA_OP_SIN				12 +#   define R500_ALPHA_OP_COS				13 +#   define R500_ALPHA_OP_MDH				14 +#   define R500_ALPHA_OP_MDV				15 +#   define R500_ALPHA_ADDRD(x)				(x << 4) +#   define R500_ALPHA_ADDRD_REL				(1 << 11) +#  define R500_ALPHA_SEL_A_SHIFT			12 +#   define R500_ALPHA_SEL_A_SRC0			(0 << 12) +#   define R500_ALPHA_SEL_A_SRC1			(1 << 12) +#   define R500_ALPHA_SEL_A_SRC2			(2 << 12) +#   define R500_ALPHA_SEL_A_SRCP			(3 << 12) +#   define R500_ALPHA_SWIZ_A_R				(0 << 14) +#   define R500_ALPHA_SWIZ_A_G				(1 << 14) +#   define R500_ALPHA_SWIZ_A_B				(2 << 14) +#   define R500_ALPHA_SWIZ_A_A				(3 << 14) +#   define R500_ALPHA_SWIZ_A_0				(4 << 14) +#   define R500_ALPHA_SWIZ_A_HALF			(5 << 14) +#   define R500_ALPHA_SWIZ_A_1				(6 << 14) +/* #define R500_ALPHA_SWIZ_A_UNUSED			(7 << 14) */ +#   define R500_ALPHA_MOD_A_NOP				(0 << 17) +#   define R500_ALPHA_MOD_A_NEG				(1 << 17) +#   define R500_ALPHA_MOD_A_ABS				(2 << 17) +#   define R500_ALPHA_MOD_A_NAB				(3 << 17) +#  define R500_ALPHA_SEL_B_SHIFT			19 +#   define R500_ALPHA_SEL_B_SRC0			(0 << 19) +#   define R500_ALPHA_SEL_B_SRC1			(1 << 19) +#   define R500_ALPHA_SEL_B_SRC2			(2 << 19) +#   define R500_ALPHA_SEL_B_SRCP			(3 << 19) +#   define R500_ALPHA_SWIZ_B_R				(0 << 21) +#   define R500_ALPHA_SWIZ_B_G				(1 << 21) +#   define R500_ALPHA_SWIZ_B_B				(2 << 21) +#   define R500_ALPHA_SWIZ_B_A				(3 << 21) +#   define R500_ALPHA_SWIZ_B_0				(4 << 21) +#   define R500_ALPHA_SWIZ_B_HALF			(5 << 21) +#   define R500_ALPHA_SWIZ_B_1				(6 << 21) +/* #define R500_ALPHA_SWIZ_B_UNUSED			(7 << 21) */ +#   define R500_ALPHA_MOD_B_NOP				(0 << 24) +#   define R500_ALPHA_MOD_B_NEG				(1 << 24) +#   define R500_ALPHA_MOD_B_ABS				(2 << 24) +#   define R500_ALPHA_MOD_B_NAB				(3 << 24) +#   define R500_ALPHA_OMOD_IDENTITY			(0 << 26) +#   define R500_ALPHA_OMOD_MUL_2			(1 << 26) +#   define R500_ALPHA_OMOD_MUL_4			(2 << 26) +#   define R500_ALPHA_OMOD_MUL_8			(3 << 26) +#   define R500_ALPHA_OMOD_DIV_2			(4 << 26) +#   define R500_ALPHA_OMOD_DIV_4			(5 << 26) +#   define R500_ALPHA_OMOD_DIV_8			(6 << 26) +#   define R500_ALPHA_OMOD_DISABLE			(7 << 26) +#   define R500_ALPHA_TARGET(x)				(x << 29) +#   define R500_ALPHA_W_OMASK				(1 << 31) +#define R500_US_ALU_ALPHA_ADDR_0			0x9800 +#   define R500_ALPHA_ADDR0(x)				(x << 0) +#   define R500_ALPHA_ADDR0_CONST			(1 << 8) +#   define R500_ALPHA_ADDR0_REL				(1 << 9) +#   define R500_ALPHA_ADDR1(x)				(x << 10) +#   define R500_ALPHA_ADDR1_CONST			(1 << 18) +#   define R500_ALPHA_ADDR1_REL				(1 << 19) +#   define R500_ALPHA_ADDR2(x)				(x << 20) +#   define R500_ALPHA_ADDR2_CONST			(1 << 28) +#   define R500_ALPHA_ADDR2_REL				(1 << 29) +#   define R500_ALPHA_SRCP_OP_1_MINUS_2A0		(0 << 30) +#   define R500_ALPHA_SRCP_OP_A1_MINUS_A0		(1 << 30) +#   define R500_ALPHA_SRCP_OP_A1_PLUS_A0		(2 << 30) +#   define R500_ALPHA_SRCP_OP_1_MINUS_A0		(3 << 30) +#define R500_US_ALU_RGBA_INST_0				0xb000 +#   define R500_ALU_RGBA_OP_MAD				(0 << 0) +#   define R500_ALU_RGBA_OP_DP3				(1 << 0) +#   define R500_ALU_RGBA_OP_DP4				(2 << 0) +#   define R500_ALU_RGBA_OP_D2A				(3 << 0) +#   define R500_ALU_RGBA_OP_MIN				(4 << 0) +#   define R500_ALU_RGBA_OP_MAX				(5 << 0) +/* #define R500_ALU_RGBA_OP_RESERVED			(6 << 0) */ +#   define R500_ALU_RGBA_OP_CND				(7 << 0) +#   define R500_ALU_RGBA_OP_CMP				(8 << 0) +#   define R500_ALU_RGBA_OP_FRC				(9 << 0) +#   define R500_ALU_RGBA_OP_SOP				(10 << 0) +#   define R500_ALU_RGBA_OP_MDH				(11 << 0) +#   define R500_ALU_RGBA_OP_MDV				(12 << 0) +#   define R500_ALU_RGBA_ADDRD(x)			(x << 4) +#   define R500_ALU_RGBA_ADDRD_REL			(1 << 11) +#  define R500_ALU_RGBA_SEL_C_SHIFT			12 +#   define R500_ALU_RGBA_SEL_C_SRC0			(0 << 12) +#   define R500_ALU_RGBA_SEL_C_SRC1			(1 << 12) +#   define R500_ALU_RGBA_SEL_C_SRC2			(2 << 12) +#   define R500_ALU_RGBA_SEL_C_SRCP			(3 << 12) +#   define R500_ALU_RGBA_R_SWIZ_R			(0 << 14) +#   define R500_ALU_RGBA_R_SWIZ_G			(1 << 14) +#   define R500_ALU_RGBA_R_SWIZ_B			(2 << 14) +#   define R500_ALU_RGBA_R_SWIZ_A			(3 << 14) +#   define R500_ALU_RGBA_R_SWIZ_0			(4 << 14) +#   define R500_ALU_RGBA_R_SWIZ_HALF			(5 << 14) +#   define R500_ALU_RGBA_R_SWIZ_1			(6 << 14) +/* #define R500_ALU_RGBA_R_SWIZ_UNUSED			(7 << 14) */ +#   define R500_ALU_RGBA_G_SWIZ_R			(0 << 17) +#   define R500_ALU_RGBA_G_SWIZ_G			(1 << 17) +#   define R500_ALU_RGBA_G_SWIZ_B			(2 << 17) +#   define R500_ALU_RGBA_G_SWIZ_A			(3 << 17) +#   define R500_ALU_RGBA_G_SWIZ_0			(4 << 17) +#   define R500_ALU_RGBA_G_SWIZ_HALF			(5 << 17) +#   define R500_ALU_RGBA_G_SWIZ_1			(6 << 17) +/* #define R500_ALU_RGBA_G_SWIZ_UNUSED			(7 << 17) */ +#   define R500_ALU_RGBA_B_SWIZ_R			(0 << 20) +#   define R500_ALU_RGBA_B_SWIZ_G			(1 << 20) +#   define R500_ALU_RGBA_B_SWIZ_B			(2 << 20) +#   define R500_ALU_RGBA_B_SWIZ_A			(3 << 20) +#   define R500_ALU_RGBA_B_SWIZ_0			(4 << 20) +#   define R500_ALU_RGBA_B_SWIZ_HALF			(5 << 20) +#   define R500_ALU_RGBA_B_SWIZ_1			(6 << 20) +/* #define R500_ALU_RGBA_B_SWIZ_UNUSED			(7 << 20) */ +#   define R500_ALU_RGBA_MOD_C_NOP			(0 << 23) +#   define R500_ALU_RGBA_MOD_C_NEG			(1 << 23) +#   define R500_ALU_RGBA_MOD_C_ABS			(2 << 23) +#   define R500_ALU_RGBA_MOD_C_NAB			(3 << 23) +#  define R500_ALU_RGBA_ALPHA_SEL_C_SHIFT		25 +#   define R500_ALU_RGBA_ALPHA_SEL_C_SRC0		(0 << 25) +#   define R500_ALU_RGBA_ALPHA_SEL_C_SRC1		(1 << 25) +#   define R500_ALU_RGBA_ALPHA_SEL_C_SRC2		(2 << 25) +#   define R500_ALU_RGBA_ALPHA_SEL_C_SRCP		(3 << 25) +#   define R500_ALU_RGBA_A_SWIZ_R			(0 << 27) +#   define R500_ALU_RGBA_A_SWIZ_G			(1 << 27) +#   define R500_ALU_RGBA_A_SWIZ_B			(2 << 27) +#   define R500_ALU_RGBA_A_SWIZ_A			(3 << 27) +#   define R500_ALU_RGBA_A_SWIZ_0			(4 << 27) +#   define R500_ALU_RGBA_A_SWIZ_HALF			(5 << 27) +#   define R500_ALU_RGBA_A_SWIZ_1			(6 << 27) +/* #define R500_ALU_RGBA_A_SWIZ_UNUSED			(7 << 27) */ +#   define R500_ALU_RGBA_ALPHA_MOD_C_NOP		(0 << 30) +#   define R500_ALU_RGBA_ALPHA_MOD_C_NEG		(1 << 30) +#   define R500_ALU_RGBA_ALPHA_MOD_C_ABS		(2 << 30) +#   define R500_ALU_RGBA_ALPHA_MOD_C_NAB		(3 << 30) +#define R500_US_ALU_RGB_INST_0				0xa000 +#  define R500_ALU_RGB_SEL_A_SHIFT			0 +#   define R500_ALU_RGB_SEL_A_SRC0			(0 << 0) +#   define R500_ALU_RGB_SEL_A_SRC1			(1 << 0) +#   define R500_ALU_RGB_SEL_A_SRC2			(2 << 0) +#   define R500_ALU_RGB_SEL_A_SRCP			(3 << 0) +#   define R500_ALU_RGB_R_SWIZ_A_R			(0 << 2) +#   define R500_ALU_RGB_R_SWIZ_A_G			(1 << 2) +#   define R500_ALU_RGB_R_SWIZ_A_B			(2 << 2) +#   define R500_ALU_RGB_R_SWIZ_A_A			(3 << 2) +#   define R500_ALU_RGB_R_SWIZ_A_0			(4 << 2) +#   define R500_ALU_RGB_R_SWIZ_A_HALF			(5 << 2) +#   define R500_ALU_RGB_R_SWIZ_A_1			(6 << 2) +/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED			(7 << 2) */ +#   define R500_ALU_RGB_G_SWIZ_A_R			(0 << 5) +#   define R500_ALU_RGB_G_SWIZ_A_G			(1 << 5) +#   define R500_ALU_RGB_G_SWIZ_A_B			(2 << 5) +#   define R500_ALU_RGB_G_SWIZ_A_A			(3 << 5) +#   define R500_ALU_RGB_G_SWIZ_A_0			(4 << 5) +#   define R500_ALU_RGB_G_SWIZ_A_HALF			(5 << 5) +#   define R500_ALU_RGB_G_SWIZ_A_1			(6 << 5) +/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED			(7 << 5) */ +#   define R500_ALU_RGB_B_SWIZ_A_R			(0 << 8) +#   define R500_ALU_RGB_B_SWIZ_A_G			(1 << 8) +#   define R500_ALU_RGB_B_SWIZ_A_B			(2 << 8) +#   define R500_ALU_RGB_B_SWIZ_A_A			(3 << 8) +#   define R500_ALU_RGB_B_SWIZ_A_0			(4 << 8) +#   define R500_ALU_RGB_B_SWIZ_A_HALF			(5 << 8) +#   define R500_ALU_RGB_B_SWIZ_A_1			(6 << 8) +/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED			(7 << 8) */ +#   define R500_ALU_RGB_MOD_A_NOP			(0 << 11) +#   define R500_ALU_RGB_MOD_A_NEG			(1 << 11) +#   define R500_ALU_RGB_MOD_A_ABS			(2 << 11) +#   define R500_ALU_RGB_MOD_A_NAB			(3 << 11) +#  define R500_ALU_RGB_SEL_B_SHIFT			13 +#   define R500_ALU_RGB_SEL_B_SRC0			(0 << 13) +#   define R500_ALU_RGB_SEL_B_SRC1			(1 << 13) +#   define R500_ALU_RGB_SEL_B_SRC2			(2 << 13) +#   define R500_ALU_RGB_SEL_B_SRCP			(3 << 13) +#   define R500_ALU_RGB_R_SWIZ_B_R			(0 << 15) +#   define R500_ALU_RGB_R_SWIZ_B_G			(1 << 15) +#   define R500_ALU_RGB_R_SWIZ_B_B			(2 << 15) +#   define R500_ALU_RGB_R_SWIZ_B_A			(3 << 15) +#   define R500_ALU_RGB_R_SWIZ_B_0			(4 << 15) +#   define R500_ALU_RGB_R_SWIZ_B_HALF			(5 << 15) +#   define R500_ALU_RGB_R_SWIZ_B_1			(6 << 15) +/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED			(7 << 15) */ +#   define R500_ALU_RGB_G_SWIZ_B_R			(0 << 18) +#   define R500_ALU_RGB_G_SWIZ_B_G			(1 << 18) +#   define R500_ALU_RGB_G_SWIZ_B_B			(2 << 18) +#   define R500_ALU_RGB_G_SWIZ_B_A			(3 << 18) +#   define R500_ALU_RGB_G_SWIZ_B_0			(4 << 18) +#   define R500_ALU_RGB_G_SWIZ_B_HALF			(5 << 18) +#   define R500_ALU_RGB_G_SWIZ_B_1			(6 << 18) +/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED			(7 << 18) */ +#   define R500_ALU_RGB_B_SWIZ_B_R			(0 << 21) +#   define R500_ALU_RGB_B_SWIZ_B_G			(1 << 21) +#   define R500_ALU_RGB_B_SWIZ_B_B			(2 << 21) +#   define R500_ALU_RGB_B_SWIZ_B_A			(3 << 21) +#   define R500_ALU_RGB_B_SWIZ_B_0			(4 << 21) +#   define R500_ALU_RGB_B_SWIZ_B_HALF			(5 << 21) +#   define R500_ALU_RGB_B_SWIZ_B_1			(6 << 21) +/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED			(7 << 21) */ +#   define R500_ALU_RGB_MOD_B_NOP			(0 << 24) +#   define R500_ALU_RGB_MOD_B_NEG			(1 << 24) +#   define R500_ALU_RGB_MOD_B_ABS			(2 << 24) +#   define R500_ALU_RGB_MOD_B_NAB			(3 << 24) +#   define R500_ALU_RGB_OMOD_IDENTITY			(0 << 26) +#   define R500_ALU_RGB_OMOD_MUL_2			(1 << 26) +#   define R500_ALU_RGB_OMOD_MUL_4			(2 << 26) +#   define R500_ALU_RGB_OMOD_MUL_8			(3 << 26) +#   define R500_ALU_RGB_OMOD_DIV_2			(4 << 26) +#   define R500_ALU_RGB_OMOD_DIV_4			(5 << 26) +#   define R500_ALU_RGB_OMOD_DIV_8			(6 << 26) +#   define R500_ALU_RGB_OMOD_DISABLE			(7 << 26) +#   define R500_ALU_RGB_TARGET(x)			(x << 29) +#   define R500_ALU_RGB_WMASK				(1 << 31) +#define R500_US_ALU_RGB_ADDR_0				0x9000 +#   define R500_RGB_ADDR0(x)				(x << 0) +#   define R500_RGB_ADDR0_CONST				(1 << 8) +#   define R500_RGB_ADDR0_REL				(1 << 9) +#   define R500_RGB_ADDR1(x)				(x << 10) +#   define R500_RGB_ADDR1_CONST				(1 << 18) +#   define R500_RGB_ADDR1_REL				(1 << 19) +#   define R500_RGB_ADDR2(x)				(x << 20) +#   define R500_RGB_ADDR2_CONST				(1 << 28) +#   define R500_RGB_ADDR2_REL				(1 << 29) +#   define R500_RGB_SRCP_OP_1_MINUS_2RGB0		(0 << 30) +#   define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0		(1 << 30) +#   define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0		(2 << 30) +#   define R500_RGB_SRCP_OP_1_MINUS_RGB0		(3 << 30) +#define R500_US_CMN_INST_0				0xb800 +#  define R500_INST_TYPE_MASK				(3 << 0) +#   define R500_INST_TYPE_ALU				(0 << 0) +#   define R500_INST_TYPE_OUT				(1 << 0) +#   define R500_INST_TYPE_FC				(2 << 0) +#   define R500_INST_TYPE_TEX				(3 << 0) +#   define R500_INST_TEX_SEM_WAIT			(1 << 2) +#   define R500_INST_RGB_PRED_SEL_NONE			(0 << 3) +#   define R500_INST_RGB_PRED_SEL_RGBA			(1 << 3) +#   define R500_INST_RGB_PRED_SEL_RRRR			(2 << 3) +#   define R500_INST_RGB_PRED_SEL_GGGG			(3 << 3) +#   define R500_INST_RGB_PRED_SEL_BBBB			(4 << 3) +#   define R500_INST_RGB_PRED_SEL_AAAA			(5 << 3) +#   define R500_INST_RGB_PRED_INV			(1 << 6) +#   define R500_INST_WRITE_INACTIVE			(1 << 7) +#   define R500_INST_LAST				(1 << 8) +#   define R500_INST_NOP				(1 << 9) +#   define R500_INST_ALU_WAIT				(1 << 10) +#   define R500_INST_RGB_WMASK_R			(1 << 11) +#   define R500_INST_RGB_WMASK_G			(1 << 12) +#   define R500_INST_RGB_WMASK_B			(1 << 13) +#   define R500_INST_ALPHA_WMASK			(1 << 14) +#   define R500_INST_RGB_OMASK_R			(1 << 15) +#   define R500_INST_RGB_OMASK_G			(1 << 16) +#   define R500_INST_RGB_OMASK_B			(1 << 17) +#   define R500_INST_ALPHA_OMASK			(1 << 18) +#   define R500_INST_RGB_CLAMP				(1 << 19) +#   define R500_INST_ALPHA_CLAMP			(1 << 20) +#   define R500_INST_ALU_RESULT_SEL			(1 << 21) +#   define R500_INST_ALPHA_PRED_INV			(1 << 22) +#   define R500_INST_ALU_RESULT_OP_EQ			(0 << 23) +#   define R500_INST_ALU_RESULT_OP_LT			(1 << 23) +#   define R500_INST_ALU_RESULT_OP_GE			(2 << 23) +#   define R500_INST_ALU_RESULT_OP_NE			(3 << 23) +#   define R500_INST_ALPHA_PRED_SEL_NONE		(0 << 25) +#   define R500_INST_ALPHA_PRED_SEL_RGBA		(1 << 25) +#   define R500_INST_ALPHA_PRED_SEL_RRRR		(2 << 25) +#   define R500_INST_ALPHA_PRED_SEL_GGGG		(3 << 25) +#   define R500_INST_ALPHA_PRED_SEL_BBBB		(4 << 25) +#   define R500_INST_ALPHA_PRED_SEL_AAAA		(5 << 25) +/* XXX next four are kind of guessed */ +#   define R500_INST_STAT_WE_R				(1 << 28) +#   define R500_INST_STAT_WE_G				(1 << 29) +#   define R500_INST_STAT_WE_B				(1 << 30) +#   define R500_INST_STAT_WE_A				(1 << 31) + +/* note that these are 8 bit lengths, despite the offsets, at least for R500 */ +#define R500_US_CODE_ADDR				0x4630 +#   define R500_US_CODE_START_ADDR(x)			(x << 0) +#   define R500_US_CODE_END_ADDR(x)			(x << 16) +#define R500_US_CODE_OFFSET				0x4638 +#   define R500_US_CODE_OFFSET_ADDR(x)			(x << 0) +#define R500_US_CODE_RANGE				0x4634 +#   define R500_US_CODE_RANGE_ADDR(x)			(x << 0) +#   define R500_US_CODE_RANGE_SIZE(x)			(x << 16) +#define R500_US_CONFIG					0x4600 +#   define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO		(1 << 1) +#define R500_US_FC_ADDR_0				0xa000 +#   define R500_FC_BOOL_ADDR(x)				(x << 0) +#   define R500_FC_INT_ADDR(x)				(x << 8) +#   define R500_FC_JUMP_ADDR(x)				(x << 16) +#   define R500_FC_JUMP_GLOBAL				(1 << 31) +#define R500_US_FC_BOOL_CONST				0x4620 +#   define R500_FC_KBOOL(x)				(x) +#define R500_US_FC_CTRL					0x4624 +#   define R500_FC_TEST_EN				(1 << 30) +#   define R500_FC_FULL_FC_EN				(1 << 31) +#define R500_US_FC_INST_0				0x9800 +#   define R500_FC_OP_JUMP				(0 << 0) +#   define R500_FC_OP_LOOP				(1 << 0) +#   define R500_FC_OP_ENDLOOP				(2 << 0) +#   define R500_FC_OP_REP				(3 << 0) +#   define R500_FC_OP_ENDREP				(4 << 0) +#   define R500_FC_OP_BREAKLOOP				(5 << 0) +#   define R500_FC_OP_BREAKREP				(6 << 0) +#   define R500_FC_OP_CONTINUE				(7 << 0) +#   define R500_FC_B_ELSE				(1 << 4) +#   define R500_FC_JUMP_ANY				(1 << 5) +#   define R500_FC_A_OP_NONE				(0 << 6) +#   define R500_FC_A_OP_POP				(1 << 6) +#   define R500_FC_A_OP_PUSH				(2 << 6) +#   define R500_FC_JUMP_FUNC(x)				(x << 8) +#   define R500_FC_B_POP_CNT(x)				(x << 16) +#   define R500_FC_B_OP0_NONE				(0 << 24) +#   define R500_FC_B_OP0_DECR				(1 << 24) +#   define R500_FC_B_OP0_INCR				(2 << 24) +#   define R500_FC_B_OP1_DECR				(0 << 26) +#   define R500_FC_B_OP1_NONE				(1 << 26) +#   define R500_FC_B_OP1_INCR				(2 << 26) +#   define R500_FC_IGNORE_UNCOVERED			(1 << 28) +#define R500_US_FC_INT_CONST_0				0x4c00 +#   define R500_FC_INT_CONST_KR(x)			(x << 0) +#   define R500_FC_INT_CONST_KG(x)			(x << 8) +#   define R500_FC_INT_CONST_KB(x)			(x << 16) +/* _0 through _15 */ +#define R500_US_FORMAT0_0				0x4640 +#   define R500_FORMAT_TXWIDTH(x)			(x << 0) +#   define R500_FORMAT_TXHEIGHT(x)			(x << 11) +#   define R500_FORMAT_TXDEPTH(x)			(x << 22) +/* _0 through _3 */ +#define R500_US_OUT_FMT_0				0x46A4 +#   define R500_OUT_FMT_C4_8				(0 << 0) +#   define R500_OUT_FMT_C4_10				(1 << 0) +#   define R500_OUT_FMT_C4_10_GAMMA			(2 << 0) +#   define R500_OUT_FMT_C_16				(3 << 0) +#   define R500_OUT_FMT_C2_16				(4 << 0) +#   define R500_OUT_FMT_C4_16				(5 << 0) +#   define R500_OUT_FMT_C_16_MPEG			(6 << 0) +#   define R500_OUT_FMT_C2_16_MPEG			(7 << 0) +#   define R500_OUT_FMT_C2_4				(8 << 0) +#   define R500_OUT_FMT_C_3_3_2				(9 << 0) +#   define R500_OUT_FMT_C_6_5_6				(10 << 0) +#   define R500_OUT_FMT_C_11_11_10			(11 << 0) +#   define R500_OUT_FMT_C_10_11_11			(12 << 0) +#   define R500_OUT_FMT_C_2_10_10_10			(13 << 0) +/* #define R500_OUT_FMT_RESERVED			(14 << 0) */ +#   define R500_OUT_FMT_UNUSED				(15 << 0) +#   define R500_OUT_FMT_C_16_FP				(16 << 0) +#   define R500_OUT_FMT_C2_16_FP			(17 << 0) +#   define R500_OUT_FMT_C4_16_FP			(18 << 0) +#   define R500_OUT_FMT_C_32_FP				(19 << 0) +#   define R500_OUT_FMT_C2_32_FP			(20 << 0) +#   define R500_OUT_FMT_C4_32_FP			(21 << 0) +#   define R500_C0_SEL_A				(0 << 8) +#   define R500_C0_SEL_R				(1 << 8) +#   define R500_C0_SEL_G				(2 << 8) +#   define R500_C0_SEL_B				(3 << 8) +#   define R500_C1_SEL_A				(0 << 10) +#   define R500_C1_SEL_R				(1 << 10) +#   define R500_C1_SEL_G				(2 << 10) +#   define R500_C1_SEL_B				(3 << 10) +#   define R500_C2_SEL_A				(0 << 12) +#   define R500_C2_SEL_R				(1 << 12) +#   define R500_C2_SEL_G				(2 << 12) +#   define R500_C2_SEL_B				(3 << 12) +#   define R500_C3_SEL_A				(0 << 14) +#   define R500_C3_SEL_R				(1 << 14) +#   define R500_C3_SEL_G				(2 << 14) +#   define R500_C3_SEL_B				(3 << 14) +#   define R500_OUT_SIGN(x)				(x << 16) +#   define R500_ROUND_ADJ				(1 << 20) +#define R500_US_PIXSIZE					0x4604 +#   define R500_PIX_SIZE(x)				(x) +#define R500_US_TEX_ADDR_0				0x9800 +#   define R500_TEX_SRC_ADDR(x)				(x << 0) +#   define R500_TEX_SRC_ADDR_REL			(1 << 7) +#   define R500_TEX_SRC_S_SWIZ_R			(0 << 8) +#   define R500_TEX_SRC_S_SWIZ_G			(1 << 8) +#   define R500_TEX_SRC_S_SWIZ_B			(2 << 8) +#   define R500_TEX_SRC_S_SWIZ_A			(3 << 8) +#   define R500_TEX_SRC_T_SWIZ_R			(0 << 10) +#   define R500_TEX_SRC_T_SWIZ_G			(1 << 10) +#   define R500_TEX_SRC_T_SWIZ_B			(2 << 10) +#   define R500_TEX_SRC_T_SWIZ_A			(3 << 10) +#   define R500_TEX_SRC_R_SWIZ_R			(0 << 12) +#   define R500_TEX_SRC_R_SWIZ_G			(1 << 12) +#   define R500_TEX_SRC_R_SWIZ_B			(2 << 12) +#   define R500_TEX_SRC_R_SWIZ_A			(3 << 12) +#   define R500_TEX_SRC_Q_SWIZ_R			(0 << 14) +#   define R500_TEX_SRC_Q_SWIZ_G			(1 << 14) +#   define R500_TEX_SRC_Q_SWIZ_B			(2 << 14) +#   define R500_TEX_SRC_Q_SWIZ_A			(3 << 14) +#   define R500_TEX_DST_ADDR(x)				(x << 16) +#   define R500_TEX_DST_ADDR_REL			(1 << 23) +#   define R500_TEX_DST_R_SWIZ_R			(0 << 24) +#   define R500_TEX_DST_R_SWIZ_G			(1 << 24) +#   define R500_TEX_DST_R_SWIZ_B			(2 << 24) +#   define R500_TEX_DST_R_SWIZ_A			(3 << 24) +#   define R500_TEX_DST_G_SWIZ_R			(0 << 26) +#   define R500_TEX_DST_G_SWIZ_G			(1 << 26) +#   define R500_TEX_DST_G_SWIZ_B			(2 << 26) +#   define R500_TEX_DST_G_SWIZ_A			(3 << 26) +#   define R500_TEX_DST_B_SWIZ_R			(0 << 28) +#   define R500_TEX_DST_B_SWIZ_G			(1 << 28) +#   define R500_TEX_DST_B_SWIZ_B			(2 << 28) +#   define R500_TEX_DST_B_SWIZ_A			(3 << 28) +#   define R500_TEX_DST_A_SWIZ_R			(0 << 30) +#   define R500_TEX_DST_A_SWIZ_G			(1 << 30) +#   define R500_TEX_DST_A_SWIZ_B			(2 << 30) +#   define R500_TEX_DST_A_SWIZ_A			(3 << 30) +#define R500_US_TEX_ADDR_DXDY_0				0xa000 +#   define R500_DX_ADDR(x)				(x << 0) +#   define R500_DX_ADDR_REL				(1 << 7) +#   define R500_DX_S_SWIZ_R				(0 << 8) +#   define R500_DX_S_SWIZ_G				(1 << 8) +#   define R500_DX_S_SWIZ_B				(2 << 8) +#   define R500_DX_S_SWIZ_A				(3 << 8) +#   define R500_DX_T_SWIZ_R				(0 << 10) +#   define R500_DX_T_SWIZ_G				(1 << 10) +#   define R500_DX_T_SWIZ_B				(2 << 10) +#   define R500_DX_T_SWIZ_A				(3 << 10) +#   define R500_DX_R_SWIZ_R				(0 << 12) +#   define R500_DX_R_SWIZ_G				(1 << 12) +#   define R500_DX_R_SWIZ_B				(2 << 12) +#   define R500_DX_R_SWIZ_A				(3 << 12) +#   define R500_DX_Q_SWIZ_R				(0 << 14) +#   define R500_DX_Q_SWIZ_G				(1 << 14) +#   define R500_DX_Q_SWIZ_B				(2 << 14) +#   define R500_DX_Q_SWIZ_A				(3 << 14) +#   define R500_DY_ADDR(x)				(x << 16) +#   define R500_DY_ADDR_REL				(1 << 17) +#   define R500_DY_S_SWIZ_R				(0 << 24) +#   define R500_DY_S_SWIZ_G				(1 << 24) +#   define R500_DY_S_SWIZ_B				(2 << 24) +#   define R500_DY_S_SWIZ_A				(3 << 24) +#   define R500_DY_T_SWIZ_R				(0 << 26) +#   define R500_DY_T_SWIZ_G				(1 << 26) +#   define R500_DY_T_SWIZ_B				(2 << 26) +#   define R500_DY_T_SWIZ_A				(3 << 26) +#   define R500_DY_R_SWIZ_R				(0 << 28) +#   define R500_DY_R_SWIZ_G				(1 << 28) +#   define R500_DY_R_SWIZ_B				(2 << 28) +#   define R500_DY_R_SWIZ_A				(3 << 28) +#   define R500_DY_Q_SWIZ_R				(0 << 30) +#   define R500_DY_Q_SWIZ_G				(1 << 30) +#   define R500_DY_Q_SWIZ_B				(2 << 30) +#   define R500_DY_Q_SWIZ_A				(3 << 30) +#define R500_US_TEX_INST_0				0x9000 +#   define R500_TEX_ID(x)				(x << 16) +#   define R500_TEX_INST_NOP				(0 << 22) +#   define R500_TEX_INST_LD				(1 << 22) +#   define R500_TEX_INST_TEXKILL			(2 << 22) +#   define R500_TEX_INST_PROJ				(3 << 22) +#   define R500_TEX_INST_LODBIAS			(4 << 22) +#   define R500_TEX_INST_LOD				(5 << 22) +#   define R500_TEX_INST_DXDY				(6 << 22) +#   define R500_TEX_SEM_ACQUIRE				(1 << 25) +#   define R500_TEX_IGNORE_UNCOVERED			(1 << 26) +#   define R500_TEX_UNSCALED				(1 << 27) +#define R300_US_W_FMT					0x46b4 +#   define R300_W_FMT_W0				(0 << 0) +#   define R300_W_FMT_W24				(1 << 0) +#   define R300_W_FMT_W24FP				(2 << 0) +#   define R300_W_SRC_US				(0 << 2) +#   define R300_W_SRC_RAS				(1 << 2) + + +/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. + * Two parameter dwords: + * 0. VAP_VTX_FMT: The first parameter is not written to hardware + * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. + */ +#define R300_PACKET3_3D_DRAW_VBUF           0x00002800 + +/* Draw a primitive from immediate vertices in this packet + * Up to 16382 dwords: + * 0. VAP_VTX_FMT: The first parameter is not written to hardware + * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. + * 2 to end: Up to 16380 dwords of vertex data. + */ +#define R300_PACKET3_3D_DRAW_IMMD           0x00002900 + +/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR and + * immediate vertices in this packet + * Up to 16382 dwords: + * 0. VAP_VTX_FMT: The first parameter is not written to hardware + * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. + * 2 to end: Up to 16380 dwords of vertex data. + */ +#define R300_PACKET3_3D_DRAW_INDX           0x00002A00 + + +/* Specify the full set of vertex arrays as (address, stride). + * The first parameter is the number of vertex arrays specified. + * The rest of the command is a variable length list of blocks, where + * each block is three dwords long and specifies two arrays. + * The first dword of a block is split into two words, the lower significant + * word refers to the first array, the more significant word to the second + * array in the block. + * The low byte of each word contains the size of an array entry in dwords, + * the high byte contains the stride of the array. + * The second dword of a block contains the pointer to the first array, + * the third dword of a block contains the pointer to the second array. + * Note that if the total number of arrays is odd, the third dword of + * the last block is omitted. + */ +#define R300_PACKET3_3D_LOAD_VBPNTR         0x00002F00 + +#define R300_PACKET3_INDX_BUFFER            0x00003300 +#    define R300_EB_UNK1_SHIFT                      24 +#    define R300_EB_UNK1                    (0x80<<24) +#    define R300_EB_UNK2                        0x0810 + +/* Same as R300_PACKET3_3D_DRAW_VBUF but without VAP_VTX_FMT */ +#define R300_PACKET3_3D_DRAW_VBUF_2         0x00003400 +/* Same as R300_PACKET3_3D_DRAW_IMMD but without VAP_VTX_FMT */ +#define R300_PACKET3_3D_DRAW_IMMD_2         0x00003500 +/* Same as R300_PACKET3_3D_DRAW_INDX but without VAP_VTX_FMT */ +#define R300_PACKET3_3D_DRAW_INDX_2         0x00003600 + +/* Clears a portion of hierachical Z RAM + * 3 dword parameters + * 0. START + * 1. COUNT: 13:0 (max is 0x3FFF) + * 2. CLEAR_VALUE: Value to write into HIZ RAM. + */ +#define R300_PACKET3_3D_CLEAR_HIZ           0x00003700 + +/* Draws a set of primitives using vertex buffers pointed by the state data. + * At least 2 Parameters: + * 0. VAP_VF_CNTL: The first parameter is a standard primitive emission dword. + * 2 to end: Data or indices (see other 3D_DRAW_* packets for details) + */ +#define R300_PACKET3_3D_DRAW_128            0x00003900 + +/* END: Packet 3 commands */ + + +/* Color formats for 2d packets + */ +#define R300_CP_COLOR_FORMAT_CI8	2 +#define R300_CP_COLOR_FORMAT_ARGB1555	3 +#define R300_CP_COLOR_FORMAT_RGB565	4 +#define R300_CP_COLOR_FORMAT_ARGB8888	6 +#define R300_CP_COLOR_FORMAT_RGB332	7 +#define R300_CP_COLOR_FORMAT_RGB8	9 +#define R300_CP_COLOR_FORMAT_ARGB4444	15 + +/* + * CP type-3 packets + */ +#define R300_CP_CMD_BITBLT_MULTI	0xC0009B00 + +/* XXX Corbin's stuff from radeon and r200 */ + +#define RADEON_WAIT_UNTIL                   0x1720 +#       define RADEON_WAIT_CRTC_PFLIP       (1 << 0) +#       define RADEON_WAIT_2D_IDLECLEAN     (1 << 16) +#       define RADEON_WAIT_3D_IDLECLEAN     (1 << 17) +#       define RADEON_WAIT_HOST_IDLECLEAN   (1 << 18) + +#define RADEON_CP_PACKET3                           0xC0000000 + +#define R200_3D_DRAW_IMMD_2      0xC0003500 + +#endif /* _R300_REG_H */ + +/* *INDENT-ON* */ + +/* vim: set foldenable foldmarker=\\{,\\} foldmethod=marker : */ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c new file mode 100644 index 0000000000..8ed66a1660 --- /dev/null +++ b/src/gallium/drivers/r300/r300_screen.c @@ -0,0 +1,271 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_screen.h" + +/* Return the identifier behind whom the brave coders responsible for this + * amalgamation of code, sweat, and duct tape, routinely obscure their names. + * + * ...I should have just put "Corbin Simpson", but I'm not that cool. + * + * (Or egotistical. Yet.) */ +static const char* r300_get_vendor(struct pipe_screen* pscreen) +{ +    return "X.Org R300 Project"; +} + +static const char* chip_families[] = { +    "R300", +    "R350", +    "R360", +    "RV350", +    "RV370", +    "RV380", +    "R420", +    "R423", +    "R430", +    "R480", +    "R481", +    "RV410", +    "RS400", +    "RC410", +    "RS480", +    "RS482", +    "RS690", +    "RS740", +    "RV515", +    "R520", +    "RV530", +    "R580", +    "RV560", +    "RV570" +}; + +static const char* r300_get_name(struct pipe_screen* pscreen) +{ +    struct r300_screen* r300screen = r300_screen(pscreen); + +    return chip_families[r300screen->caps->family]; +} + +static int r300_get_param(struct pipe_screen* pscreen, int param) +{ +    struct r300_screen* r300screen = r300_screen(pscreen); + +    switch (param) { +        /* XXX cases marked "IN THEORY" are possible on the hardware, +         * but haven't been implemented yet. */ +        case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: +            /* XXX I'm told this goes up to 16 */ +            return 8; +        case PIPE_CAP_NPOT_TEXTURES: +            /* IN THEORY */ +            return 0; +        case PIPE_CAP_TWO_SIDED_STENCIL: +            if (r300screen->caps->is_r500) { +                return 1; +            } else { +                return 0; +            } +            return 0; +        case PIPE_CAP_GLSL: +            /* IN THEORY */ +            return 0; +        case PIPE_CAP_S3TC: +            /* IN THEORY */ +            return 0; +        case PIPE_CAP_ANISOTROPIC_FILTER: +            /* IN THEORY */ +            return 0; +        case PIPE_CAP_POINT_SPRITE: +            /* IN THEORY */ +            return 0; +        case PIPE_CAP_MAX_RENDER_TARGETS: +            /* XXX 4 eventually */ +            return 1; +        case PIPE_CAP_OCCLUSION_QUERY: +            /* IN THEORY */ +            return 0; +        case PIPE_CAP_TEXTURE_SHADOW_MAP: +            /* IN THEORY */ +            return 0; +        case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: +            if (r300screen->caps->is_r500) { +                /* 13 == 4096x4096 */ +                return 13; +            } else { +                /* 12 == 2048x2048 */ +                return 12; +            } +        case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: +            /* So, technically, the limit is the same as above, but some math +             * shows why this is silly. Assuming RGBA, 4cpp, we can see that +             * 4096*4096*4096 = 64.0 GiB exactly, so it's not exactly +             * practical. However, if at some point a game really wants this, +             * then we can remove this limit. */ +            if (r300screen->caps->is_r500) { +                /* 9 == 256x256x256 */ +                return 9; +            } else { +                /* 8 == 128*128*128 */ +                return 8; +            } +        case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: +            if (r300screen->caps->is_r500) { +                /* 13 == 4096x4096 */ +                return 13; +            } else { +                /* 12 == 2048x2048 */ +                return 12; +            } +        case PIPE_CAP_TEXTURE_MIRROR_CLAMP: +            return 1; +        case PIPE_CAP_TEXTURE_MIRROR_REPEAT: +            return 1; +        case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: +            /* XXX guessing */ +            return 2; +        default: +            debug_printf("r300: Implementation error: Bad param %d\n", +                param); +            return 0; +    } +} + +static float r300_get_paramf(struct pipe_screen* pscreen, int param) +{ +    switch (param) { +        case PIPE_CAP_MAX_LINE_WIDTH: +        case PIPE_CAP_MAX_LINE_WIDTH_AA: +            /* XXX this is the biggest thing that will fit in that register. +            * Perhaps the actual rendering limits are less? */ +            return 10922.0f; +        case PIPE_CAP_MAX_POINT_WIDTH: +        case PIPE_CAP_MAX_POINT_WIDTH_AA: +            /* XXX this is the biggest thing that will fit in that register. +             * Perhaps the actual rendering limits are less? */ +            return 10922.0f; +        case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: +            return 16.0f; +        case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: +            return 16.0f; +        default: +            debug_printf("r300: Implementation error: Bad paramf %d\n", +                param); +            return 0.0f; +    } +} + +/* XXX moar formats */ +static boolean check_tex_2d_format(enum pipe_format format) +{ +    switch (format) { +        case PIPE_FORMAT_A8R8G8B8_UNORM: +        case PIPE_FORMAT_I8_UNORM: +            return TRUE; +        default: +            debug_printf("r300: Warning: Got unknown format: %s, in %s\n", +                pf_name(format), __FUNCTION__); +            break; +    } + +    return FALSE; +} + +/* XXX moar targets */ +static boolean r300_is_format_supported(struct pipe_screen* pscreen, +                                        enum pipe_format format, +                                        enum pipe_texture_target target, +                                        unsigned tex_usage, +                                        unsigned geom_flags) +{ +    switch (target) { +        case PIPE_TEXTURE_2D: +            return check_tex_2d_format(format); +        default: +            debug_printf("r300: Warning: Got unknown format target: %d\n", +                format); +            break; +    } + +    return FALSE; +} + +static void* r300_surface_map(struct pipe_screen* screen, +                              struct pipe_surface* surface, +                              unsigned flags) +{ +    struct r300_texture* tex = (struct r300_texture*)surface->texture; +    char* map = pipe_buffer_map(screen, tex->buffer, flags); + +    if (!map) { +        return NULL; +    } + +    return map + surface->offset; +} + +static void r300_surface_unmap(struct pipe_screen* screen, +                               struct pipe_surface* surface) +{ +    struct r300_texture* tex = (struct r300_texture*)surface->texture; +    pipe_buffer_unmap(screen, tex->buffer); +} + +static void r300_destroy_screen(struct pipe_screen* pscreen) +{ +    struct r300_screen* r300screen = r300_screen(pscreen); + +    FREE(r300screen->caps); +    FREE(r300screen); +} + +struct pipe_screen* r300_create_screen(struct pipe_winsys* winsys, +                                       struct r300_winsys* r300_winsys) +{ +    struct r300_screen* r300screen = CALLOC_STRUCT(r300_screen); +    struct r300_capabilities* caps = CALLOC_STRUCT(r300_capabilities); + +    if (!r300screen || !caps) +        return NULL; + +    caps->pci_id = r300_winsys->pci_id; +    caps->num_frag_pipes = r300_winsys->gb_pipes; + +    r300_parse_chipset(caps); + +    r300screen->caps = caps; +    r300screen->screen.winsys = winsys; +    r300screen->screen.destroy = r300_destroy_screen; +    r300screen->screen.get_name = r300_get_name; +    r300screen->screen.get_vendor = r300_get_vendor; +    r300screen->screen.get_param = r300_get_param; +    r300screen->screen.get_paramf = r300_get_paramf; +    r300screen->screen.is_format_supported = r300_is_format_supported; +    r300screen->screen.surface_map = r300_surface_map; +    r300screen->screen.surface_unmap = r300_surface_unmap; + +    r300_init_screen_texture_functions(&r300screen->screen); +    u_simple_screen_init(&r300screen->screen); + +    return &r300screen->screen; +} diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h new file mode 100644 index 0000000000..2e25f61dbf --- /dev/null +++ b/src/gallium/drivers/r300/r300_screen.h @@ -0,0 +1,52 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_SCREEN_H +#define R300_SCREEN_H + +#include "pipe/p_inlines.h" +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "util/u_simple_screen.h" + +#include "r300_chipset.h" +#include "r300_texture.h" +#include "r300_winsys.h" + +struct r300_screen { +    /* Parent class */ +    struct pipe_screen screen; + +    /* Chipset capabilities */ +    struct r300_capabilities* caps; +}; + +/* Convenience cast wrapper. */ +static struct r300_screen* r300_screen(struct pipe_screen* screen) { +    return (struct r300_screen*)screen; +} + +/* Creates a new r300 screen. */ +struct pipe_screen* r300_create_screen(struct pipe_winsys* winsys, +                                       struct r300_winsys* r300_winsys); + +#endif /* R300_SCREEN_H */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c new file mode 100644 index 0000000000..9392d72342 --- /dev/null +++ b/src/gallium/drivers/r300/r300_state.c @@ -0,0 +1,826 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "util/u_math.h" +#include "util/u_pack_color.h" +#include "pipe/p_debug.h" + +#include "r300_context.h" +#include "r300_reg.h" + +/* r300_state: Functions used to intialize state context by translating + * Gallium state objects into semi-native r300 state objects. + * + * XXX break this file up into pieces if it gets too big! */ + +/* Pack a float into a dword. */ +static uint32_t pack_float_32(float f) +{ +    union { +        float f; +        uint32_t u; +    } u; + +    u.f = f; +    return u.u; +} + +static uint32_t translate_blend_function(int blend_func) { +    switch (blend_func) { +        case PIPE_BLEND_ADD: +            return R300_COMB_FCN_ADD_CLAMP; +        case PIPE_BLEND_SUBTRACT: +            return R300_COMB_FCN_SUB_CLAMP; +        case PIPE_BLEND_REVERSE_SUBTRACT: +            return R300_COMB_FCN_RSUB_CLAMP; +        case PIPE_BLEND_MIN: +            return R300_COMB_FCN_MIN; +        case PIPE_BLEND_MAX: +            return R300_COMB_FCN_MAX; +        default: +            debug_printf("r300: Unknown blend function %d\n", blend_func); +            break; +    } +    return 0; +} + +/* XXX we can also offer the D3D versions of some of these... */ +static uint32_t translate_blend_factor(int blend_fact) { +    switch (blend_fact) { +        case PIPE_BLENDFACTOR_ONE: +            return R300_BLEND_GL_ONE; +        case PIPE_BLENDFACTOR_SRC_COLOR: +            return R300_BLEND_GL_SRC_COLOR; +        case PIPE_BLENDFACTOR_SRC_ALPHA: +            return R300_BLEND_GL_SRC_ALPHA; +        case PIPE_BLENDFACTOR_DST_ALPHA: +            return R300_BLEND_GL_DST_ALPHA; +        case PIPE_BLENDFACTOR_DST_COLOR: +            return R300_BLEND_GL_DST_COLOR; +        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: +            return R300_BLEND_GL_SRC_ALPHA_SATURATE; +        case PIPE_BLENDFACTOR_CONST_COLOR: +            return R300_BLEND_GL_CONST_COLOR; +        case PIPE_BLENDFACTOR_CONST_ALPHA: +            return R300_BLEND_GL_CONST_ALPHA; +        /* XXX WTF are these? +        case PIPE_BLENDFACTOR_SRC1_COLOR: +        case PIPE_BLENDFACTOR_SRC1_ALPHA: */ +        case PIPE_BLENDFACTOR_ZERO: +            return R300_BLEND_GL_ZERO; +        case PIPE_BLENDFACTOR_INV_SRC_COLOR: +            return R300_BLEND_GL_ONE_MINUS_SRC_COLOR; +        case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +            return R300_BLEND_GL_ONE_MINUS_SRC_ALPHA; +        case PIPE_BLENDFACTOR_INV_DST_ALPHA: +            return R300_BLEND_GL_ONE_MINUS_DST_ALPHA; +        case PIPE_BLENDFACTOR_INV_DST_COLOR: +            return R300_BLEND_GL_ONE_MINUS_DST_COLOR; +        case PIPE_BLENDFACTOR_INV_CONST_COLOR: +            return R300_BLEND_GL_ONE_MINUS_CONST_COLOR; +        case PIPE_BLENDFACTOR_INV_CONST_ALPHA: +            return R300_BLEND_GL_ONE_MINUS_CONST_ALPHA; +        /* XXX see above +        case PIPE_BLENDFACTOR_INV_SRC1_COLOR: +        case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: */ +        default: +            debug_printf("r300: Unknown blend factor %d\n", blend_fact); +            break; +    } +    return 0; +} + +/* Create a new blend state based on the CSO blend state. + * + * This encompasses alpha blending, logic/raster ops, and blend dithering. */ +static void* r300_create_blend_state(struct pipe_context* pipe, +                                     const struct pipe_blend_state* state) +{ +    struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state); + +    if (state->blend_enable) { +        /* XXX for now, always do separate alpha... +         * is it faster to do it with one reg? */ +        blend->blend_control = R300_ALPHA_BLEND_ENABLE | +                R300_SEPARATE_ALPHA_ENABLE | +                R300_READ_ENABLE | +                translate_blend_function(state->rgb_func) | +                (translate_blend_factor(state->rgb_src_factor) << +                    R300_SRC_BLEND_SHIFT) | +                (translate_blend_factor(state->rgb_dst_factor) << +                    R300_DST_BLEND_SHIFT); +        blend->alpha_blend_control = +                translate_blend_function(state->alpha_func) | +                (translate_blend_factor(state->alpha_src_factor) << +                    R300_SRC_BLEND_SHIFT) | +                (translate_blend_factor(state->alpha_dst_factor) << +                    R300_DST_BLEND_SHIFT); +    } + +    /* PIPE_LOGICOP_* don't need to be translated, fortunately. */ +    /* XXX are logicops still allowed if blending's disabled? +     * Does Gallium take care of it for us? */ +    if (state->logicop_enable) { +        blend->rop = R300_RB3D_ROPCNTL_ROP_ENABLE | +                (state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT; +    } + +    if (state->dither) { +        blend->dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT | +                R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT; +    } + +    return (void*)blend; +} + +/* Bind blend state. */ +static void r300_bind_blend_state(struct pipe_context* pipe, +                                  void* state) +{ +    struct r300_context* r300 = r300_context(pipe); + +    r300->blend_state = (struct r300_blend_state*)state; +    r300->dirty_state |= R300_NEW_BLEND; +} + +/* Free blend state. */ +static void r300_delete_blend_state(struct pipe_context* pipe, +                                    void* state) +{ +    FREE(state); +} + +/* Set blend color. + * Setup both R300 and R500 registers, figure out later which one to write. */ +static void r300_set_blend_color(struct pipe_context* pipe, +                                 const struct pipe_blend_color* color) +{ +    struct r300_context* r300 = r300_context(pipe); +    uint32_t r, g, b, a; +    ubyte ur, ug, ub, ua; + +    r = util_iround(color->color[0] * 1023.0f); +    g = util_iround(color->color[1] * 1023.0f); +    b = util_iround(color->color[2] * 1023.0f); +    a = util_iround(color->color[3] * 1023.0f); + +    ur = float_to_ubyte(color->color[0]); +    ug = float_to_ubyte(color->color[1]); +    ub = float_to_ubyte(color->color[2]); +    ua = float_to_ubyte(color->color[3]); + +    r300->blend_color_state->blend_color = (a << 24) | (r << 16) | (g << 8) | b; + +    r300->blend_color_state->blend_color_red_alpha = ur | (ua << 16); +    r300->blend_color_state->blend_color_green_blue = ub | (ug << 16); + +    r300->dirty_state |= R300_NEW_BLEND_COLOR; +} + +static void r300_set_clip_state(struct pipe_context* pipe, +                                const struct pipe_clip_state* state) +{ +    struct r300_context* r300 = r300_context(pipe); +    /* XXX Draw */ +    draw_flush(r300->draw); +    draw_set_clip_state(r300->draw, state); +} + +static void +    r300_set_constant_buffer(struct pipe_context* pipe, +                             uint shader, uint index, +                             const struct pipe_constant_buffer* buffer) +{ +    /* XXX */ +} + +static uint32_t translate_depth_stencil_function(int zs_func) { +    switch (zs_func) { +        case PIPE_FUNC_NEVER: +            return R300_ZS_NEVER; +        case PIPE_FUNC_LESS: +            return R300_ZS_LESS; +        case PIPE_FUNC_EQUAL: +            return R300_ZS_EQUAL; +        case PIPE_FUNC_LEQUAL: +            return R300_ZS_LEQUAL; +        case PIPE_FUNC_GREATER: +            return R300_ZS_GREATER; +        case PIPE_FUNC_NOTEQUAL: +            return R300_ZS_NOTEQUAL; +        case PIPE_FUNC_GEQUAL: +            return R300_ZS_GEQUAL; +        case PIPE_FUNC_ALWAYS: +            return R300_ZS_ALWAYS; +        default: +            debug_printf("r300: Unknown depth/stencil function %d\n", +                zs_func); +            break; +    } +    return 0; +} + +static uint32_t translate_stencil_op(int s_op) { +    switch (s_op) { +        case PIPE_STENCIL_OP_KEEP: +            return R300_ZS_KEEP; +        case PIPE_STENCIL_OP_ZERO: +            return R300_ZS_ZERO; +        case PIPE_STENCIL_OP_REPLACE: +            return R300_ZS_REPLACE; +        case PIPE_STENCIL_OP_INCR: +            return R300_ZS_INCR; +        case PIPE_STENCIL_OP_DECR: +            return R300_ZS_DECR; +        case PIPE_STENCIL_OP_INCR_WRAP: +            return R300_ZS_INCR_WRAP; +        case PIPE_STENCIL_OP_DECR_WRAP: +            return R300_ZS_DECR_WRAP; +        case PIPE_STENCIL_OP_INVERT: +            return R300_ZS_INVERT; +        default: +            debug_printf("r300: Unknown stencil op %d", s_op); +            break; +    } +    return 0; +} + +static uint32_t translate_alpha_function(int alpha_func) { +    switch (alpha_func) { +        case PIPE_FUNC_NEVER: +            return R300_FG_ALPHA_FUNC_NEVER; +        case PIPE_FUNC_LESS: +            return R300_FG_ALPHA_FUNC_LESS; +        case PIPE_FUNC_EQUAL: +            return R300_FG_ALPHA_FUNC_EQUAL; +        case PIPE_FUNC_LEQUAL: +            return R300_FG_ALPHA_FUNC_LE; +        case PIPE_FUNC_GREATER: +            return R300_FG_ALPHA_FUNC_GREATER; +        case PIPE_FUNC_NOTEQUAL: +            return R300_FG_ALPHA_FUNC_NOTEQUAL; +        case PIPE_FUNC_GEQUAL: +            return R300_FG_ALPHA_FUNC_GE; +        case PIPE_FUNC_ALWAYS: +            return R300_FG_ALPHA_FUNC_ALWAYS; +        default: +            debug_printf("r300: Unknown alpha function %d", alpha_func); +            break; +    } +    return 0; +} + +/* Create a new depth, stencil, and alpha state based on the CSO dsa state. + * + * This contains the depth buffer, stencil buffer, alpha test, and such. + * On the Radeon, depth and stencil buffer setup are intertwined, which is + * the reason for some of the strange-looking assignments across registers. */ +static void* +        r300_create_dsa_state(struct pipe_context* pipe, +                              const struct pipe_depth_stencil_alpha_state* state) +{ +    struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state); + +    /* Depth test setup. */ +    if (state->depth.enabled) { +        dsa->z_buffer_control |= R300_Z_ENABLE; + +        if (state->depth.writemask) { +            dsa->z_buffer_control |= R300_Z_WRITE_ENABLE; +        } + +        dsa->z_stencil_control |= +            (translate_depth_stencil_function(state->depth.func) << +                R300_Z_FUNC_SHIFT); +    } + +    /* Stencil buffer setup. */ +    if (state->stencil[0].enabled) { +        dsa->z_buffer_control |= R300_STENCIL_ENABLE; +        dsa->z_stencil_control |= +                (translate_depth_stencil_function(state->stencil[0].func) << +                    R300_S_FRONT_FUNC_SHIFT) | +                (translate_stencil_op(state->stencil[0].fail_op) << +                    R300_S_FRONT_SFAIL_OP_SHIFT) | +                (translate_stencil_op(state->stencil[0].zpass_op) << +                    R300_S_FRONT_ZPASS_OP_SHIFT) | +                (translate_stencil_op(state->stencil[0].zfail_op) << +                    R300_S_FRONT_ZFAIL_OP_SHIFT); + +        dsa->stencil_ref_mask = (state->stencil[0].ref_value) | +                (state->stencil[0].valuemask << R300_STENCILMASK_SHIFT) | +                (state->stencil[0].writemask << R300_STENCILWRITEMASK_SHIFT); + +        if (state->stencil[1].enabled) { +            dsa->z_buffer_control |= R300_STENCIL_FRONT_BACK; +            dsa->z_stencil_control |= +                (translate_depth_stencil_function(state->stencil[1].func) << +                    R300_S_BACK_FUNC_SHIFT) | +                (translate_stencil_op(state->stencil[1].fail_op) << +                    R300_S_BACK_SFAIL_OP_SHIFT) | +                (translate_stencil_op(state->stencil[1].zpass_op) << +                    R300_S_BACK_ZPASS_OP_SHIFT) | +                (translate_stencil_op(state->stencil[1].zfail_op) << +                    R300_S_BACK_ZFAIL_OP_SHIFT); + +            dsa->stencil_ref_bf = (state->stencil[1].ref_value) | +                (state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) | +                (state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT); +        } +    } + +    /* Alpha test setup. */ +    if (state->alpha.enabled) { +        dsa->alpha_function = translate_alpha_function(state->alpha.func) | +            R300_FG_ALPHA_FUNC_ENABLE; +        dsa->alpha_reference = CLAMP(state->alpha.ref_value * 1023.0f, +                                     0, 1023); +    } else { +        dsa->z_buffer_top = R300_ZTOP_ENABLE; +    } + +    return (void*)dsa; +} + +/* Bind DSA state. */ +static void r300_bind_dsa_state(struct pipe_context* pipe, +                                void* state) +{ +    struct r300_context* r300 = r300_context(pipe); + +    r300->dsa_state = (struct r300_dsa_state*)state; +    r300->dirty_state |= R300_NEW_DSA; +} + +/* Free DSA state. */ +static void r300_delete_dsa_state(struct pipe_context* pipe, +                                  void* state) +{ +    FREE(state); +} + +static void r300_set_edgeflags(struct pipe_context* pipe, +                               const unsigned* bitfield) +{ +    /* XXX you know it's bad when i915 has this blank too */ +} + +static void +    r300_set_framebuffer_state(struct pipe_context* pipe, +                               const struct pipe_framebuffer_state* state) +{ +    struct r300_context* r300 = r300_context(pipe); + +    draw_flush(r300->draw); + +    r300->framebuffer_state = *state; + +    r300->dirty_state |= R300_NEW_FRAMEBUFFERS; +} + +/* Create fragment shader state. */ +static void* r300_create_fs_state(struct pipe_context* pipe, +                                  const struct pipe_shader_state* shader) +{ +    struct r300_context* r300 = r300_context(pipe); +    struct r3xx_fragment_shader* fs = NULL; + +    if (r300_screen(r300->context.screen)->caps->is_r500) { +        fs = +            (struct r3xx_fragment_shader*)CALLOC_STRUCT(r500_fragment_shader); +    } else { +        fs = +            (struct r3xx_fragment_shader*)CALLOC_STRUCT(r300_fragment_shader); +    } + +    /* Copy state directly into shader. */ +    fs->state = *shader; + +    return (void*)fs; +} + +/* Bind fragment shader state. */ +static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) +{ +    struct r300_context* r300 = r300_context(pipe); +    struct r3xx_fragment_shader* fs = (struct r3xx_fragment_shader*)shader; + +    if (!fs->translated) { +        if (r300_screen(r300->context.screen)->caps->is_r500) { +            r500_translate_shader(r300, fs); +        } else { +            r300_translate_shader(r300, fs); +        } +    } + +    r300->fs = fs; + +    r300->dirty_state |= R300_NEW_FRAGMENT_SHADER; +} + +/* Delete fragment shader state. */ +static void r300_delete_fs_state(struct pipe_context* pipe, void* shader) +{ +    FREE(shader); +} + +static void r300_set_polygon_stipple(struct pipe_context* pipe, +                                     const struct pipe_poly_stipple* state) +{ +    /* XXX */ +} + +static INLINE int pack_float_16_6x(float f) { +    return ((int)(f * 6.0) & 0xffff); +} + +/* Create a new rasterizer state based on the CSO rasterizer state. + * + * This is a very large chunk of state, and covers most of the graphics + * backend (GB), geometry assembly (GA), and setup unit (SU) blocks. + * + * In a not entirely unironic sidenote, this state has nearly nothing to do + * with the actual block on the Radeon called the rasterizer (RS). */ +static void* r300_create_rs_state(struct pipe_context* pipe, +                                  const struct pipe_rasterizer_state* state) +{ +    struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state); + +    /* XXX this is part of HW TCL */ +    /* XXX endian control */ +    rs->vap_control_status = R300_VAP_TCL_BYPASS; + +    rs->point_size = pack_float_16_6x(state->point_size) | +        (pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT); + +    rs->line_control = pack_float_16_6x(state->line_width) | +        R300_GA_LINE_CNTL_END_TYPE_COMP; + +    /* Radeons don't think in "CW/CCW", they think in "front/back". */ +    if (state->front_winding == PIPE_WINDING_CW) { +        rs->cull_mode = R300_FRONT_FACE_CW; + +        if (state->offset_cw) { +            rs->polygon_offset_enable |= R300_FRONT_ENABLE; +        } +        if (state->offset_ccw) { +            rs->polygon_offset_enable |= R300_BACK_ENABLE; +        } +    } else { +        rs->cull_mode = R300_FRONT_FACE_CCW; + +        if (state->offset_ccw) { +            rs->polygon_offset_enable |= R300_FRONT_ENABLE; +        } +        if (state->offset_cw) { +            rs->polygon_offset_enable |= R300_BACK_ENABLE; +        } +    } +    if (state->front_winding & state->cull_mode) { +        rs->cull_mode |= R300_CULL_FRONT; +    } +    if (~(state->front_winding) & state->cull_mode) { +        rs->cull_mode |= R300_CULL_BACK; +    } + +    if (rs->polygon_offset_enable) { +        rs->depth_offset_front = rs->depth_offset_back = +                pack_float_32(state->offset_units); +        rs->depth_scale_front = rs->depth_scale_back = +                pack_float_32(state->offset_scale); +    } + +    if (state->line_stipple_enable) { +        rs->line_stipple_config = +            R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE | +            (pack_float_32((float)state->line_stipple_factor) & +                R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK); +        /* XXX this might need to be scaled up */ +        rs->line_stipple_value = state->line_stipple_pattern; +    } + +    return (void*)rs; +} + +/* Bind rasterizer state. */ +static void r300_bind_rs_state(struct pipe_context* pipe, void* state) +{ +    struct r300_context* r300 = r300_context(pipe); + +    r300->rs_state = (struct r300_rs_state*)state; +    r300->dirty_state |= R300_NEW_RASTERIZER; +} + +/* Free rasterizer state. */ +static void r300_delete_rs_state(struct pipe_context* pipe, void* state) +{ +    FREE(state); +} + +static uint32_t translate_wrap(int wrap) { +    switch (wrap) { +        case PIPE_TEX_WRAP_REPEAT: +            return R300_TX_REPEAT; +        case PIPE_TEX_WRAP_CLAMP: +            return R300_TX_CLAMP; +        case PIPE_TEX_WRAP_CLAMP_TO_EDGE: +            return R300_TX_CLAMP_TO_EDGE; +        case PIPE_TEX_WRAP_CLAMP_TO_BORDER: +            return R300_TX_CLAMP_TO_BORDER; +        case PIPE_TEX_WRAP_MIRROR_REPEAT: +            return R300_TX_REPEAT | R300_TX_MIRRORED; +        case PIPE_TEX_WRAP_MIRROR_CLAMP: +            return R300_TX_CLAMP | R300_TX_MIRRORED; +        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: +            return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; +        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: +            return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; +        default: +            debug_printf("r300: Unknown texture wrap %d", wrap); +            return 0; +    } +} + +static uint32_t translate_tex_filters(int min, int mag, int mip) { +    uint32_t retval = 0; +    switch (min) { +        case PIPE_TEX_FILTER_NEAREST: +            retval |= R300_TX_MIN_FILTER_NEAREST; +        case PIPE_TEX_FILTER_LINEAR: +            retval |= R300_TX_MIN_FILTER_LINEAR; +        case PIPE_TEX_FILTER_ANISO: +            retval |= R300_TX_MIN_FILTER_ANISO; +        default: +            debug_printf("r300: Unknown texture filter %d", min); +            break; +    } +    switch (mag) { +        case PIPE_TEX_FILTER_NEAREST: +            retval |= R300_TX_MAG_FILTER_NEAREST; +        case PIPE_TEX_FILTER_LINEAR: +            retval |= R300_TX_MAG_FILTER_LINEAR; +        case PIPE_TEX_FILTER_ANISO: +            retval |= R300_TX_MAG_FILTER_ANISO; +        default: +            debug_printf("r300: Unknown texture filter %d", mag); +            break; +    } +    switch (mip) { +        case PIPE_TEX_MIPFILTER_NONE: +            retval |= R300_TX_MIN_FILTER_MIP_NONE; +        case PIPE_TEX_MIPFILTER_NEAREST: +            retval |= R300_TX_MIN_FILTER_MIP_NEAREST; +        case PIPE_TEX_MIPFILTER_LINEAR: +            retval |= R300_TX_MIN_FILTER_MIP_LINEAR; +        default: +            debug_printf("r300: Unknown texture filter %d", mip); +            break; +    } + +    return retval; +} + +static uint32_t anisotropy(float max_aniso) { +    if (max_aniso >= 16.0f) { +        return R300_TX_MAX_ANISO_16_TO_1; +    } else if (max_aniso >= 8.0f) { +        return R300_TX_MAX_ANISO_8_TO_1; +    } else if (max_aniso >= 4.0f) { +        return R300_TX_MAX_ANISO_4_TO_1; +    } else if (max_aniso >= 2.0f) { +        return R300_TX_MAX_ANISO_2_TO_1; +    } else { +        return R300_TX_MAX_ANISO_1_TO_1; +    } +} + +static void* +        r300_create_sampler_state(struct pipe_context* pipe, +                                  const struct pipe_sampler_state* state) +{ +    struct r300_context* r300 = r300_context(pipe); +    struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state); +    int lod_bias; + +    sampler->filter0 |= +        (translate_wrap(state->wrap_s) << R300_TX_WRAP_S_SHIFT) | +        (translate_wrap(state->wrap_t) << R300_TX_WRAP_T_SHIFT) | +        (translate_wrap(state->wrap_r) << R300_TX_WRAP_R_SHIFT); + +    sampler->filter0 |= translate_tex_filters(state->min_img_filter, +                                              state->mag_img_filter, +                                              state->min_mip_filter); + +    lod_bias = CLAMP((int)(state->lod_bias * 32), -(1 << 9), (1 << 9) - 1); + +    sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT; + +    sampler->filter1 |= anisotropy(state->max_anisotropy); + +    util_pack_color(state->border_color, PIPE_FORMAT_A8R8G8B8_UNORM, +                    &sampler->border_color); + +    /* R500-specific fixups and optimizations */ +    if (r300_screen(r300->context.screen)->caps->is_r500) { +        sampler->filter1 |= R500_BORDER_FIX; +    } + +    return (void*)sampler; +} + +static void r300_bind_sampler_states(struct pipe_context* pipe, +                                     unsigned count, +                                     void** states) +{ +    struct r300_context* r300 = r300_context(pipe); +    int i; + +    if (count > 8) { +        return; +    } + +    for (i = 0; i < count; i++) { +        if (r300->sampler_states[i] != states[i]) { +            r300->sampler_states[i] = (struct r300_sampler_state*)states[i]; +            r300->dirty_state |= (R300_NEW_SAMPLER << i); +        } +    } + +    r300->sampler_count = count; +} + +static void r300_delete_sampler_state(struct pipe_context* pipe, void* state) +{ +    FREE(state); +} + +static void r300_set_sampler_textures(struct pipe_context* pipe, +                                      unsigned count, +                                      struct pipe_texture** texture) +{ +    struct r300_context* r300 = r300_context(pipe); +    int i; + +    /* XXX magic num */ +    if (count > 8) { +        return; +    } + +    for (i = 0; i < count; i++) { +        if (r300->textures[i] != (struct r300_texture*)texture[i]) { +            pipe_texture_reference((struct pipe_texture**)&r300->textures[i], +                texture[i]); +            r300->dirty_state |= (R300_NEW_TEXTURE << i); +        } +    } + +    for (i = count; i < 8; i++) { +        if (r300->textures[i]) { +            pipe_texture_reference((struct pipe_texture**)&r300->textures[i], +                NULL); +            r300->dirty_state |= (R300_NEW_TEXTURE << i); +        } +    } + +    r300->texture_count = count; +} + +static void r300_set_scissor_state(struct pipe_context* pipe, +                                   const struct pipe_scissor_state* state) +{ +    struct r300_context* r300 = r300_context(pipe); +    draw_flush(r300->draw); + +    uint32_t left, top, right, bottom; + +    /* So, a bit of info. The scissors are offset by R300_SCISSORS_OFFSET in +     * both directions for all values, and can only be 13 bits wide. Why? +     * We may never know. */ +    left = (state->minx + R300_SCISSORS_OFFSET) & 0x1fff; +    top = (state->miny + R300_SCISSORS_OFFSET) & 0x1fff; +    right = (state->maxx + R300_SCISSORS_OFFSET) & 0x1fff; +    bottom = (state->maxy + R300_SCISSORS_OFFSET) & 0x1fff; + +    r300->scissor_state->scissor_top_left = (left << R300_SCISSORS_X_SHIFT) | +            (top << R300_SCISSORS_Y_SHIFT); +    r300->scissor_state->scissor_bottom_right = +        (right << R300_SCISSORS_X_SHIFT) | (bottom << R300_SCISSORS_Y_SHIFT); + +    r300->dirty_state |= R300_NEW_SCISSOR; +} + +static void r300_set_viewport_state(struct pipe_context* pipe, +                                    const struct pipe_viewport_state* state) +{ +    struct r300_context* r300 = r300_context(pipe); +    /* XXX handing this off to Draw for now */ +    draw_set_viewport_state(r300->draw, state); +} + +static void r300_set_vertex_buffers(struct pipe_context* pipe, +                                    unsigned count, +                                    const struct pipe_vertex_buffer* buffers) +{ +    struct r300_context* r300 = r300_context(pipe); +    /* XXX Draw */ +    draw_flush(r300->draw); +    draw_set_vertex_buffers(r300->draw, count, buffers); +} + +static void r300_set_vertex_elements(struct pipe_context* pipe, +                                    unsigned count, +                                    const struct pipe_vertex_element* elements) +{ +    struct r300_context* r300 = r300_context(pipe); +    /* XXX Draw */ +    draw_flush(r300->draw); +    draw_set_vertex_elements(r300->draw, count, elements); +} + +static void* r300_create_vs_state(struct pipe_context* pipe, +                                  const struct pipe_shader_state* state) +{ +    struct r300_context* context = r300_context(pipe); +    /* XXX handing this off to Draw for now */ +    return draw_create_vertex_shader(context->draw, state); +} + +static void r300_bind_vs_state(struct pipe_context* pipe, void* state) { +    struct r300_context* context = r300_context(pipe); +    /* XXX handing this off to Draw for now */ +    draw_bind_vertex_shader(context->draw, (struct draw_vertex_shader*)state); +} + +static void r300_delete_vs_state(struct pipe_context* pipe, void* state) +{ +    struct r300_context* context = r300_context(pipe); +    /* XXX handing this off to Draw for now */ +    draw_delete_vertex_shader(context->draw, (struct draw_vertex_shader*)state); +} + +void r300_init_state_functions(struct r300_context* r300) +{ +    r300->context.create_blend_state = r300_create_blend_state; +    r300->context.bind_blend_state = r300_bind_blend_state; +    r300->context.delete_blend_state = r300_delete_blend_state; + +    r300->context.set_blend_color = r300_set_blend_color; + +    r300->context.set_clip_state = r300_set_clip_state; + +    r300->context.set_constant_buffer = r300_set_constant_buffer; + +    r300->context.create_depth_stencil_alpha_state = r300_create_dsa_state; +    r300->context.bind_depth_stencil_alpha_state = r300_bind_dsa_state; +    r300->context.delete_depth_stencil_alpha_state = r300_delete_dsa_state; + +    r300->context.set_edgeflags = r300_set_edgeflags; + +    r300->context.set_framebuffer_state = r300_set_framebuffer_state; + +    r300->context.create_fs_state = r300_create_fs_state; +    r300->context.bind_fs_state = r300_bind_fs_state; +    r300->context.delete_fs_state = r300_delete_fs_state; + +    r300->context.set_polygon_stipple = r300_set_polygon_stipple; + +    r300->context.create_rasterizer_state = r300_create_rs_state; +    r300->context.bind_rasterizer_state = r300_bind_rs_state; +    r300->context.delete_rasterizer_state = r300_delete_rs_state; + +    r300->context.create_sampler_state = r300_create_sampler_state; +    r300->context.bind_sampler_states = r300_bind_sampler_states; +    r300->context.delete_sampler_state = r300_delete_sampler_state; + +    r300->context.set_sampler_textures = r300_set_sampler_textures; + +    r300->context.set_scissor_state = r300_set_scissor_state; + +    r300->context.set_viewport_state = r300_set_viewport_state; + +    r300->context.set_vertex_buffers = r300_set_vertex_buffers; +    r300->context.set_vertex_elements = r300_set_vertex_elements; + +    r300->context.create_vs_state = r300_create_vs_state; +    r300->context.bind_vs_state = r300_bind_vs_state; +    r300->context.delete_vs_state = r300_delete_vs_state; +} diff --git a/src/gallium/drivers/r300/r300_state_shader.c b/src/gallium/drivers/r300/r300_state_shader.c new file mode 100644 index 0000000000..e87172128f --- /dev/null +++ b/src/gallium/drivers/r300/r300_state_shader.c @@ -0,0 +1,33 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_state_shader.h" + +void r300_translate_shader(struct r300_context* r300, +                           struct r300_fragment_shader* fs) +{ +} + +void r500_translate_shader(struct r300_context* r300, +                           struct r500_fragment_shader* fs) +{ +} diff --git a/src/gallium/drivers/r300/r300_state_shader.h b/src/gallium/drivers/r300/r300_state_shader.h new file mode 100644 index 0000000000..a20bd4276c --- /dev/null +++ b/src/gallium/drivers/r300/r300_state_shader.h @@ -0,0 +1,35 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_STATE_SHADER_H +#define R300_STATE_SHADER_H + +#include "r300_context.h" +#include "r300_screen.h" + +void r300_translate_shader(struct r300_context* r300, +                           struct r300_fragment_shader* fs); + +void r500_translate_shader(struct r300_context* r300, +                           struct r500_fragment_shader* fs); + +#endif /* R300_STATE_SHADER_H */ diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c new file mode 100644 index 0000000000..1e1f96a7f9 --- /dev/null +++ b/src/gallium/drivers/r300/r300_surface.c @@ -0,0 +1,352 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_surface.h" + +/* Provides pipe_context's "surface_fill". Commonly used for clearing + * buffers. */ +static void r300_surface_fill(struct pipe_context* pipe, +                              struct pipe_surface* dest, +                              unsigned x, unsigned y, +                              unsigned w, unsigned h, +                              unsigned color) +{ +    struct r300_context* r300 = r300_context(pipe); +    CS_LOCALS(r300); +    struct r300_capabilities* caps = ((struct r300_screen*)pipe->screen)->caps; +    struct r300_texture* tex = (struct r300_texture*)dest->texture; +    int i; +    float r, g, b, a; +    r = (float)((color >> 16) & 0xff) / 255.0f; +    g = (float)((color >>  8) & 0xff) / 255.0f; +    b = (float)((color >>  0) & 0xff) / 255.0f; +    debug_printf("r300: Filling surface %p at (%d,%d)," +        " dimensions %dx%d (stride %d), color 0x%x\n", +        dest, x, y, w, h, dest->stride, color); + +    /* Fallback? */ +    if (0) { +        debug_printf("r300: Falling back on surface clear..."); +        void* map = pipe->screen->surface_map(pipe->screen, dest, +            PIPE_BUFFER_USAGE_CPU_WRITE); +        pipe_fill_rect(map, &dest->block, &dest->stride, x, y, w, h, color); +        pipe->screen->surface_unmap(pipe->screen, dest); +        return; +    } + +BEGIN_CS((caps->is_r500) ? 309 : 280); +R300_PACIFY; +OUT_CS_REG(R300_TX_INVALTAGS, 0x0); +R300_PACIFY; +/* Flush PVS. */ +OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); + +OUT_CS_REG(R300_SE_VTE_CNTL, R300_VPORT_X_SCALE_ENA | +    R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | +    R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | +    R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT); +/* Vertex size. */ +OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8); +/* Max and min vertex index clamp. */ +OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xFFFFFF); +OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0); +/* XXX endian */ +OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VC_NO_SWAP); +OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, 0x0); +/* XXX magic number not in r300_reg */ +OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xAAAAAAAA); +OUT_CS_REG(R300_VAP_CLIP_CNTL, 0x0); +OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); +OUT_CS_32F(1.0); +OUT_CS_32F(1.0); +OUT_CS_32F(1.0); +OUT_CS_32F(1.0); +/* XXX is this too long? */ +OUT_CS_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xFFFF); +OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE | +    R300_GB_LINE_STUFF_ENABLE | R300_GB_TRIANGLE_STUFF_ENABLE); +/* XXX more magic numbers */ +OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); +OUT_CS_REG(R300_GB_MSPOS1, 0x66666666); +/* XXX why doesn't classic Mesa write the number of pipes, too? */ +OUT_CS_REG(R300_GB_TILE_CONFIG, R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16); +OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); +OUT_CS_REG(R300_GB_AA_CONFIG, 0x0); +/* XXX point tex stuffing */ +OUT_CS_REG_SEQ(R300_GA_POINT_S0, 1); +OUT_CS_32F(0.0); +OUT_CS_REG_SEQ(R300_GA_POINT_S1, 1); +OUT_CS_32F(1.0); +OUT_CS_REG(R300_GA_TRIANGLE_STIPPLE, 0x5 | +    (0x5 << R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT)); +/* XXX should this be related to the actual point size? */ +OUT_CS_REG(R300_GA_POINT_MINMAX, 0x6 | +    (0x1800 << R300_GA_POINT_MINMAX_MAX_SHIFT)); +/* XXX this big chunk should be refactored into rs_state */ +OUT_CS_REG(R300_GA_LINE_CNTL, 0x00030006); +OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, 0x3BAAAAAB); +OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, 0x00000000); +OUT_CS_REG(R300_GA_LINE_S0, 0x00000000); +OUT_CS_REG(R300_GA_LINE_S1, 0x3F800000); +OUT_CS_REG(R300_GA_ENHANCE, 0x00000002); +OUT_CS_REG(R300_GA_COLOR_CONTROL, 0x0003AAAA); +OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000); +OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000); +OUT_CS_REG(R300_GA_POLY_MODE, 0x00000000); +OUT_CS_REG(R300_GA_ROUND_MODE, 0x00000001); +OUT_CS_REG(R300_GA_OFFSET, 0x00000000); +OUT_CS_REG(R300_GA_FOG_SCALE, 0x3DBF1412); +OUT_CS_REG(R300_GA_FOG_OFFSET, 0x00000000); +OUT_CS_REG(R300_SU_TEX_WRAP, 0x00000000); +OUT_CS_REG(R300_SU_POLY_OFFSET_FRONT_SCALE, 0x00000000); +OUT_CS_REG(R300_SU_POLY_OFFSET_FRONT_OFFSET, 0x00000000); +OUT_CS_REG(R300_SU_POLY_OFFSET_BACK_SCALE, 0x00000000); +OUT_CS_REG(R300_SU_POLY_OFFSET_BACK_OFFSET, 0x00000000); +OUT_CS_REG(R300_SU_POLY_OFFSET_ENABLE, 0x00000000); +OUT_CS_REG(R300_SU_CULL_MODE, 0x00000000); +OUT_CS_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); +OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000); +OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); +OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); +OUT_CS_REG(R300_SC_SCREENDOOR, 0x00FFFFFF); +OUT_CS_REG(R300_FG_FOG_BLEND, 0x00000002); +OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x00000000); +OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x00000000); +OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x00000000); +OUT_CS_REG(R300_FG_DEPTH_SRC, 0x00000000); +OUT_CS_REG(R300_FG_DEPTH_SRC, 0x00000000); +OUT_CS_REG(R300_RB3D_CCTL, 0x00000000); +OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F); + +/* XXX: Oh the wonderful unknown */ +OUT_CS_REG_SEQ(0x4E54, 8); +for (i = 0; i < 8; i++) +    OUT_CS(0x00000000); +OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); +OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000); +OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF); +OUT_CS_REG(R300_ZB_FORMAT, 0x00000002); +OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, 0x00000003); +OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); +OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); +OUT_CS_REG(0x4F30, 0x00000000); +OUT_CS_REG(0x4F34, 0x00000000); +OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); +OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000); +R300_PACIFY; +if (caps->has_tcl) { +    OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, +        (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) | +        ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | +            R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)); +} else { +    OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, +        (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) | +        ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | +            R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)); +} +OUT_CS_REG(R300_FG_FOG_BLEND, 0x00000000); +OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0xF688F688); +OUT_CS_REG(R300_VAP_VTX_STATE_CNTL, 0x1); +OUT_CS_REG(R300_VAP_VSM_VTX_ASSM, 0x405); +OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F); +OUT_CS_REG(R300_VAP_VTX_SIZE, 0x00000008); +OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xAAAAAAAA); +OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0, 0x00000003); +OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x00000000); +OUT_CS_REG(R300_TX_ENABLE, 0x0); +/* XXX viewport setup */ +OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); +OUT_CS_32F(1.0); +OUT_CS_32F((float)x); +OUT_CS_32F(1.0); +OUT_CS_32F((float)y); +OUT_CS_32F(1.0); +OUT_CS_32F(0.0); + +if (caps->has_tcl) { +    OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE | +        R300_PS_UCP_MODE_CLIP_AS_TRIFAN); +} + +OUT_CS_REG(R300_GA_POINT_SIZE, ((h * 6) & R300_POINTSIZE_Y_MASK) | +    ((w * 6) << R300_POINTSIZE_X_SHIFT)); + +/* XXX RS block and fp setup */ +if (caps->is_r500) { +    OUT_CS_REG_SEQ(R500_RS_IP_0, 8); +    for (i = 0; i < 8; i++) { +        /* I like the operator macros more than the shift macros... */ +        OUT_CS((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | +            (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | +            (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | +            (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); +    } +    /* XXX */ +    OUT_CS_REG_SEQ(R300_RS_COUNT, 2); +    OUT_CS((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); +    OUT_CS(0x0); +    OUT_CS_REG(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE); + +    OUT_CS_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); +    OUT_CS_REG(R500_US_PIXSIZE, 0x00000000); +    OUT_CS_REG(R500_US_CODE_ADDR, R500_US_CODE_START_ADDR(0) | +        R500_US_CODE_END_ADDR(1)); +    OUT_CS_REG(R500_US_CODE_RANGE, R500_US_CODE_RANGE_ADDR(0) | +        R500_US_CODE_RANGE_SIZE(1)); +    OUT_CS_REG(R500_US_CODE_OFFSET, R500_US_CODE_OFFSET_ADDR(0)); +    R300_PACIFY; +    OUT_CS_REG(R500_GA_US_VECTOR_INDEX, +        0 | R500_GA_US_VECTOR_INDEX_TYPE_INSTR); +    OUT_CS_REG(R500_GA_US_VECTOR_DATA, +        R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT | R500_INST_LAST | +        R500_INST_RGB_OMASK_R | R500_INST_RGB_OMASK_G | R500_INST_RGB_OMASK_B | +        R500_INST_ALPHA_OMASK | R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP); +    OUT_CS_REG(R500_GA_US_VECTOR_DATA, +        R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | +        R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST); +    OUT_CS_REG(R500_GA_US_VECTOR_DATA, +        R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | +        R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST); +    OUT_CS_REG(R500_GA_US_VECTOR_DATA, +        R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | +        R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | +        R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R | +        R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B); +    OUT_CS_REG(R500_GA_US_VECTOR_DATA, +        R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A); +    OUT_CS_REG(R500_GA_US_VECTOR_DATA, +        R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | +        R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | +        R500_ALU_RGBA_A_SWIZ_0); +} else { +    OUT_CS_REG_SEQ(R300_RS_IP_0, 8); +    for (i = 0; i < 8; i++) { +        OUT_CS(R300_RS_SEL_T(R300_RS_SEL_K0) | +            R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1)); +    } +    /* XXX */ +    OUT_CS_REG_SEQ(R300_RS_COUNT, 2); +    OUT_CS((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); +    OUT_CS(1); +    OUT_CS_REG(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE); + +    /* XXX magic numbers */ +    OUT_CS_REG(R300_US_CONFIG, 0); +    OUT_CS_REG(R300_US_PIXSIZE, 2); +    OUT_CS_REG(R300_US_CODE_OFFSET, 0x0); +    OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0); +    OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0); +    OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0); +    OUT_CS_REG(R300_US_CODE_ADDR_3, 0x400000); +    OUT_CS_REG(R300_US_ALU_RGB_INST_0, 0x50A80); +    OUT_CS_REG(R300_US_ALU_RGB_ADDR_0, 0x1C000000); +    OUT_CS_REG(R300_US_ALU_ALPHA_INST_0, 0x40889); +    OUT_CS_REG(R300_US_ALU_ALPHA_ADDR_0, 0x1000000); +    OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); +    OUT_CS(R300_C0_SEL_B | R300_C1_SEL_G | R300_C2_SEL_R | R300_C3_SEL_A); +    OUT_CS(R300_US_OUT_FMT_UNUSED); +    OUT_CS(R300_US_OUT_FMT_UNUSED); +    OUT_CS(R300_US_OUT_FMT_UNUSED); +    OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W0); +} +/* XXX these magic numbers should be explained when + * this becomes a cached state object */ +if (caps->has_tcl) { +    OUT_CS_REG(R300_VAP_CNTL, 0xA | +        (0x5 << R300_PVS_NUM_CNTLRS_SHIFT) | +        (0xB << R300_VF_MAX_VTX_NUM_SHIFT) | +        (caps->num_vert_fpus << R300_PVS_NUM_FPUS_SHIFT)); +    OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_0, 0x00100000); +    OUT_CS_REG(R300_VAP_PVS_CONST_CNTL, 0x00000000); +    OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_1, 0x00000001); +    R300_PACIFY; +    /* XXX translate these back into normal instructions */ +    OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x1); +    OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0x0); +    OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0xF00203); +    OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0xD10001); +    OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0x1248001); +    OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0x0); +    OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0xF02203); +    OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0xD10021); +    OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0x1248021); +    OUT_CS_REG(R300_VAP_PVS_UPLOAD_DATA, 0x0); +} else { +    OUT_CS_REG(R300_VAP_CNTL, 0xA | +        (0x5 << R300_PVS_NUM_CNTLRS_SHIFT) | +        (0x5 << R300_VF_MAX_VTX_NUM_SHIFT) | +        (caps->num_vert_fpus << R300_PVS_NUM_FPUS_SHIFT)); +} +R300_PACIFY; +END_CS; + +r300_emit_blend_state(r300, &blend_clear_state); +r300_emit_blend_color_state(r300, &blend_color_clear_state); +r300_emit_dsa_state(r300, &dsa_clear_state); + +BEGIN_CS(36); +R300_PACIFY; +/* Flush colorbuffer and blend caches. */ +OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, +    R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D | +    R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL); +OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, +    R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | +    R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + +OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1); +OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); +/* XXX this should not be so rigid and it still doesn't work right */ +OUT_CS_REG(R300_RB3D_COLORPITCH0, (dest->stride >> 2) | R300_COLOR_FORMAT_ARGB8888); +OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F); +/* XXX Packet3 */ +OUT_CS(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); +OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | +(1 << R300_PRIM_NUM_VERTICES_SHIFT)); +OUT_CS_32F(w / 2.0); +OUT_CS_32F(h / 2.0); +/* XXX this should be the depth value to clear to */ +OUT_CS_32F(1.0); +OUT_CS_32F(1.0); +OUT_CS_32F(r); +OUT_CS_32F(g); +OUT_CS_32F(b); +OUT_CS_32F(1.0); + +/* XXX figure out why this is 0xA and not 0x2 */ +OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA); +/* XXX OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, +    R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | +    R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); */ +R300_PACIFY; + +END_CS; +FLUSH_CS; + +    r300->dirty_state = R300_NEW_KITCHEN_SINK; +} + +void r300_init_surface_functions(struct r300_context* r300) +{ +    r300->context.surface_fill = r300_surface_fill; +} diff --git a/src/gallium/drivers/r300/r300_surface.h b/src/gallium/drivers/r300/r300_surface.h new file mode 100644 index 0000000000..e1d53116a1 --- /dev/null +++ b/src/gallium/drivers/r300/r300_surface.h @@ -0,0 +1,58 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_SURFACE_H +#define R300_SURFACE_H + +#include "pipe/p_context.h" +#include "pipe/p_screen.h" + +#include "util/u_rect.h" + +#include "r300_context.h" +#include "r300_cs.h" +#include "r300_emit.h" + +const struct r300_blend_state blend_clear_state = { +    .blend_control = 0x0, +    .alpha_blend_control = 0x0, +    .rop = 0x0, +    .dither = 0x0, +}; + +const struct r300_blend_color_state blend_color_clear_state = { +    .blend_color = 0x0, +    .blend_color_red_alpha = 0x0, +    .blend_color_green_blue = 0x0, +}; + +const struct r300_dsa_state dsa_clear_state = { +    .alpha_function = 0x0, +    .alpha_reference = 0x0, +    .z_buffer_control = 0x0, +    .z_stencil_control = 0x0, +    .stencil_ref_mask = R300_STENCILWRITEMASK_MASK, +    .z_buffer_top = R300_ZTOP_ENABLE, +    .stencil_ref_bf = 0x0, +}; + +#endif /* R300_SURFACE_H */ diff --git a/src/gallium/drivers/r300/r300_swtcl_emit.c b/src/gallium/drivers/r300/r300_swtcl_emit.c new file mode 100644 index 0000000000..f6e98d23e9 --- /dev/null +++ b/src/gallium/drivers/r300/r300_swtcl_emit.c @@ -0,0 +1,129 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "draw/draw_pipe.h" +#include "util/u_memory.h" + +#include "r300_cs.h" +#include "r300_context.h" +#include "r300_reg.h" + +/* r300_swtcl_emit: Primitive vertex emission using an immediate + * vertex buffer and no HW TCL. */ + +struct swtcl_stage { +    /* Parent class */ +    struct draw_stage draw; + +    struct r300_context* r300; +}; + +static INLINE struct swtcl_stage* swtcl_stage(struct draw_stage* draw) { +    return (struct swtcl_stage*)draw; +} + +static void r300_emit_vertex(struct r300_context* r300, +                             const struct vertex_header* vertex) +{ +    /* XXX */ +} + +static INLINE void r300_emit_prim(struct draw_stage* draw, +                                  struct prim_header* prim, +                                  unsigned hwprim, +                                  unsigned count) +{ +    struct r300_context* r300 = swtcl_stage(draw)->r300; +    CS_LOCALS(r300); +    int i; + +    r300_emit_dirty_state(r300); + +    /* XXX should be count * vtx size */ +    BEGIN_CS(2 + count + 6); +    OUT_CS(CP_PACKET3(R200_3D_DRAW_IMMD_2, count)); +    OUT_CS(hwprim | R300_PRIM_WALK_RING | +        (count << R300_PRIM_NUM_VERTICES_SHIFT)); + +    for (i = 0; i < count; i++) { +        r300_emit_vertex(r300, prim->v[i]); +    } +    R300_PACIFY; +    END_CS; +} + +/* Just as an aside... + * + * Radeons can do many more primitives: + * - Line strip + * - Triangle fan + * - Triangle strip + * - Line loop + * - Quads + * - Quad strip + * - Polygons + * + * The following were just the only ones in Draw. */ + +static void r300_emit_point(struct draw_stage* draw, struct prim_header* prim) +{ +    r300_emit_prim(draw, prim, R300_PRIM_TYPE_POINT, 1); +} + +static void r300_emit_line(struct draw_stage* draw, struct prim_header* prim) +{ +    r300_emit_prim(draw, prim, R300_PRIM_TYPE_LINE, 2); +} + +static void r300_emit_tri(struct draw_stage* draw, struct prim_header* prim) +{ +    r300_emit_prim(draw, prim, R300_PRIM_TYPE_TRI_LIST, 3); +} + +static void r300_swtcl_flush(struct draw_stage* draw, unsigned flags) +{ +} + +static void r300_reset_stipple(struct draw_stage* draw) +{ +    /* XXX */ +} + +static void r300_swtcl_destroy(struct draw_stage* draw) +{ +    FREE(draw); +} + +struct draw_stage* r300_draw_swtcl_stage(struct r300_context* r300) +{ +    struct swtcl_stage* swtcl = CALLOC_STRUCT(swtcl_stage); + +    swtcl->r300 = r300; +    swtcl->draw.point = r300_emit_point; +    swtcl->draw.line = r300_emit_line; +    swtcl->draw.tri = r300_emit_tri; +    swtcl->draw.flush = r300_swtcl_flush; +    swtcl->draw.reset_stipple_counter = r300_reset_stipple; +    swtcl->draw.destroy = r300_swtcl_destroy; + +    return &swtcl->draw; +} diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c new file mode 100644 index 0000000000..ff812c09f8 --- /dev/null +++ b/src/gallium/drivers/r300/r300_texture.c @@ -0,0 +1,193 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_texture.h" + +static int minify(int i) +{ +    return MAX2(1, i >> 1); +} + +static void r300_setup_miptree(struct r300_texture* tex) +{ +    struct pipe_texture* base = &tex->tex; +    int stride, size, offset; + +    for (int i = 0; i <= base->last_level; i++) { +        if (i > 0) { +            base->width[i] = minify(base->width[i-1]); +            base->height[i] = minify(base->height[i-1]); +            base->depth[i] = minify(base->depth[i-1]); +        } + +        base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]); +        base->nblocksy[i] = pf_get_nblocksy(&base->block, base->width[i]); + +        /* Radeons enjoy things in multiples of 32. */ +        /* XXX NPOT -> 64, not 32 */ +        stride = (base->nblocksx[i] * base->block.size + 63) & ~63; +        size = stride * base->nblocksy[i] * base->depth[i]; + +        /* XXX 64 for NPOT */ +        tex->offset[i] = (tex->size + 63) & ~63; +        tex->size = tex->offset[i] + size; +    } +} + +/* Create a new texture. */ +static struct pipe_texture* +    r300_texture_create(struct pipe_screen* screen, +                        const struct pipe_texture* template) +{ +    /* XXX struct r300_screen* r300screen = r300_screen(screen); */ + +    struct r300_texture* tex = CALLOC_STRUCT(r300_texture); + +    if (!tex) { +        return NULL; +    } + +    tex->tex = *template; +    tex->tex.refcount = 1; +    tex->tex.screen = screen; + +    r300_setup_miptree(tex); + +    tex->buffer = screen->buffer_create(screen, 63, +                                        PIPE_BUFFER_USAGE_PIXEL, +                                        tex->size); + +    if (!tex->buffer) { +        FREE(tex); +        return NULL; +    } + +    return (struct pipe_texture*)tex; +} + +static void r300_texture_release(struct pipe_screen* screen, +                                 struct pipe_texture** texture) +{ +    if (!*texture) { +        return; +    } + +    (*texture)->refcount--; + +    if ((*texture)->refcount <= 0) { +        struct r300_texture* tex = (struct r300_texture*)*texture; + +        pipe_buffer_reference(screen, &tex->buffer, NULL); + +        FREE(tex); +    } + +    *texture = NULL; +} + +static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, +                                                 struct pipe_texture* texture, +                                                 unsigned face, +                                                 unsigned level, +                                                 unsigned zslice, +                                                 unsigned flags) +{ +    struct r300_texture* tex = (struct r300_texture*)texture; +    struct pipe_surface* surface = CALLOC_STRUCT(pipe_surface); +    unsigned offset; + +    /* XXX this is certainly dependent on tex target */ +    offset = tex->offset[level]; + +    if (surface) { +        surface->refcount = 1; +        pipe_texture_reference(&surface->texture, texture); +        surface->format = texture->format; +        surface->width = texture->width[level]; +        surface->height = texture->height[level]; +        surface->block = texture->block; +        surface->nblocksx = texture->nblocksx[level]; +        surface->nblocksy = texture->nblocksy[level]; +        /* XXX save the actual stride instead plz kthnxbai */ +        surface->stride = +            (texture->nblocksx[level] * texture->block.size + 63) & ~63; +        surface->offset = offset; +        surface->usage = flags; +        surface->status = PIPE_SURFACE_STATUS_DEFINED; +    } + +    return surface; +} + +static void r300_tex_surface_release(struct pipe_screen* screen, +                                     struct pipe_surface** surface) +{ +    struct pipe_surface* s = *surface; + +    s->refcount--; + +    if (s->refcount <= 0) { +        pipe_texture_reference(&s->texture, NULL); +        FREE(s); +    } + +    *surface = NULL; +} + +static struct pipe_texture* +    r300_texture_blanket(struct pipe_screen* screen, +                         const struct pipe_texture* base, +                         const unsigned* stride, +                         struct pipe_buffer* buffer) +{ +    struct r300_texture* tex; + +    if (base->target != PIPE_TEXTURE_2D || +        base->last_level != 0 || +        base->depth[0] != 1) { +        return NULL; +    } + +    tex = CALLOC_STRUCT(r300_texture); +    if (!tex) { +        return NULL; +    } + +    tex->tex = *base; +    tex->tex.refcount = 1; +    tex->tex.screen = screen; + +    /* XXX tex->stride = *stride; */ + +    pipe_buffer_reference(screen, &tex->buffer, buffer); + +    return (struct pipe_texture*)tex; +} + +void r300_init_screen_texture_functions(struct pipe_screen* screen) +{ +    screen->texture_create = r300_texture_create; +    screen->texture_release = r300_texture_release; +    screen->get_tex_surface = r300_get_tex_surface; +    screen->tex_surface_release = r300_tex_surface_release; +    screen->texture_blanket = r300_texture_blanket; +} diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h new file mode 100644 index 0000000000..7964229a94 --- /dev/null +++ b/src/gallium/drivers/r300/r300_texture.h @@ -0,0 +1,34 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_TEXTURE_H +#define R300_TEXTURE_H + +#include "pipe/p_screen.h" + +#include "util/u_math.h" + +#include "r300_context.h" + +void r300_init_screen_texture_functions(struct pipe_screen* screen); + +#endif /* R300_TEXTURE_H */ diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h new file mode 100644 index 0000000000..5a3a212892 --- /dev/null +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -0,0 +1,94 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_WINSYS_H +#define R300_WINSYS_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* The public interface header for the r300 pipe driver. + * Any winsys hosting this pipe needs to implement r300_winsys and then + * call r300_create_context to start things. */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +struct radeon_cs; + +struct r300_winsys { + +    /* PCI ID */ +    uint32_t pci_id; + +    /* GB pipe count */ +    uint32_t gb_pipes; + +    /* CS object. This is very much like Intel's batchbuffer. +     * Fill it full of dwords and relocs and then submit. +     * Repeat as needed. */ +    /* Note: Unlike Mesa's version of this, we don't keep a copy of the CSM +     * that was used to create this CS. Is this a good idea? */ +    /* Note: The pipe driver doesn't know how to use this. This is purely +     * for the winsys. */ +    struct radeon_cs* cs; + +    /* Check to see if there's room for commands. */ +    boolean (*check_cs)(struct radeon_cs* cs, int size); + +    /* Start a command emit. */ +    void (*begin_cs)(struct radeon_cs* cs, +           int size, +           const char* file, +           const char* function, +           int line); + +    /* Write a dword to the command buffer. */ +    void (*write_cs_dword)(struct radeon_cs* cs, uint32_t dword); + +    /* Write a relocated dword to the command buffer. */ +    void (*write_cs_reloc)(struct radeon_cs* cs, +           struct pipe_buffer* bo, +           uint32_t rd, +           uint32_t wd, +           uint32_t flags); + +    /* Finish a command emit. */ +    void (*end_cs)(struct radeon_cs* cs, +           const char* file, +           const char* function, +           int line); + +    /* Flush the CS. */ +    void (*flush_cs)(struct radeon_cs* cs); +}; + +struct pipe_context* r300_create_context(struct pipe_screen* screen, +                                         struct pipe_winsys* winsys, +                                         struct r300_winsys* r300_winsys); + +#ifdef __cplusplus +} +#endif + +#endif /* R300_WINSYS_H */ diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile new file mode 100644 index 0000000000..120bdfd9dd --- /dev/null +++ b/src/gallium/drivers/softpipe/Makefile @@ -0,0 +1,47 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = softpipe + +C_SOURCES = \ +	sp_fs_exec.c \ +	sp_fs_sse.c \ +	sp_fs_llvm.c \ +	sp_clear.c \ +	sp_flush.c \ +	sp_query.c \ +	sp_context.c \ +	sp_draw_arrays.c \ +	sp_prim_setup.c \ +	sp_prim_vbuf.c \ +	sp_quad.c \ +	sp_quad_alpha_test.c \ +	sp_quad_blend.c \ +	sp_quad_colormask.c \ +	sp_quad_coverage.c \ +	sp_quad_depth_test.c \ +	sp_quad_earlyz.c \ +	sp_quad_fs.c \ +	sp_quad_occlusion.c \ +	sp_quad_output.c \ +	sp_quad_stencil.c \ +	sp_quad_stipple.c \ +	sp_screen.c \ +        sp_setup.c \ +	sp_state_blend.c \ +	sp_state_clip.c \ +	sp_state_derived.c \ +	sp_state_fs.c \ +	sp_state_sampler.c \ +	sp_state_rasterizer.c \ +	sp_state_surface.c \ +	sp_state_vertex.c \ +	sp_texture.c \ +	sp_tex_sample.c \ +	sp_tile_cache.c \ +	sp_surface.c  + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript new file mode 100644 index 0000000000..c1f7daa8ab --- /dev/null +++ b/src/gallium/drivers/softpipe/SConscript @@ -0,0 +1,46 @@ +Import('*') + +env = env.Clone() + +softpipe = env.ConvenienceLibrary( +	target = 'softpipe', +	source = [ +		'sp_fs_exec.c', +		'sp_fs_sse.c', +		'sp_fs_llvm.c', +		'sp_clear.c', +		'sp_context.c', +		'sp_draw_arrays.c', +		'sp_flush.c', +		'sp_prim_setup.c', +		'sp_prim_vbuf.c', +		'sp_setup.c', +		'sp_quad_alpha_test.c', +		'sp_quad_blend.c', +		'sp_quad.c', +		'sp_quad_colormask.c', +		'sp_quad_coverage.c', +		'sp_quad_depth_test.c', +		'sp_quad_earlyz.c', +		'sp_quad_fs.c', +		'sp_quad_occlusion.c', +		'sp_quad_output.c', +		'sp_quad_stencil.c', +		'sp_quad_stipple.c', +		'sp_query.c', +		'sp_screen.c', +		'sp_state_blend.c', +		'sp_state_clip.c', +		'sp_state_derived.c', +		'sp_state_fs.c', +		'sp_state_rasterizer.c', +		'sp_state_sampler.c', +		'sp_state_surface.c', +		'sp_state_vertex.c', +		'sp_surface.c', +		'sp_tex_sample.c', +		'sp_texture.c', +		'sp_tile_cache.c', +	]) + +Export('softpipe')
\ No newline at end of file diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c new file mode 100644 index 0000000000..ad108ec446 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -0,0 +1,107 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Author: + *    Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "util/u_pack_color.h" +#include "sp_clear.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_state.h" +#include "sp_tile_cache.h" + + +/** + * Convert packed pixel from one format to another. + */ +static unsigned +convert_color(enum pipe_format srcFormat, unsigned srcColor, +              enum pipe_format dstFormat) +{ +   ubyte r, g, b, a; +   unsigned dstColor; + +   util_unpack_color_ub(srcFormat, &srcColor, &r, &g, &b, &a); +   util_pack_color_ub(r, g, b, a, dstFormat, &dstColor); + +   return dstColor; +} + + + +/** + * Clear the given surface to the specified value. + * No masking, no scissor (clear entire buffer). + * Note: when clearing a color buffer, the clearValue is always + * encoded as PIPE_FORMAT_A8R8G8B8_UNORM. + */ +void +softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, +               unsigned clearValue) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); +   uint i; + +   if (softpipe->no_rast) +      return; + +#if 0 +   softpipe_update_derived(softpipe); /* not needed?? */ +#endif + +   if (ps == sp_tile_cache_get_surface(softpipe->zsbuf_cache)) { +      sp_tile_cache_clear(softpipe->zsbuf_cache, clearValue); +      softpipe->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_CLEAR; +#if TILE_CLEAR_OPTIMIZATION +      return; +#endif +   } + +   for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { +      if (ps == sp_tile_cache_get_surface(softpipe->cbuf_cache[i])) { +         unsigned cv; +         if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) { +            cv = convert_color(PIPE_FORMAT_A8R8G8B8_UNORM, clearValue, +                               ps->format); +         } +         else { +            cv = clearValue; +         } +         sp_tile_cache_clear(softpipe->cbuf_cache[i], cv); +         softpipe->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_CLEAR; +      } +   } + +#if !TILE_CLEAR_OPTIMIZATION +   /* non-cached surface */ +   pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +#endif +} diff --git a/src/gallium/drivers/softpipe/sp_clear.h b/src/gallium/drivers/softpipe/sp_clear.h new file mode 100644 index 0000000000..a8ed1c4ecc --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_clear.h @@ -0,0 +1,43 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Author: + *    Brian Paul + */ + +#ifndef SP_CLEAR_H +#define SP_CLEAR_H + +#include "pipe/p_state.h" +struct pipe_context; + +extern void +softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, +               unsigned clearValue); + + +#endif /* SP_CLEAR_H */ diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c new file mode 100644 index 0000000000..c2d882a819 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -0,0 +1,288 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2008 VMware, Inc.  All rights reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Author: + *    Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "sp_clear.h" +#include "sp_context.h" +#include "sp_flush.h" +#include "sp_prim_setup.h" +#include "sp_prim_vbuf.h" +#include "sp_state.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_texture.h" +#include "sp_winsys.h" +#include "sp_query.h" + + + +/** + * Map any drawing surfaces which aren't already mapped + */ +void +softpipe_map_surfaces(struct softpipe_context *sp) +{ +   unsigned i; + +   for (i = 0; i < sp->framebuffer.nr_cbufs; i++) { +      sp_tile_cache_map_surfaces(sp->cbuf_cache[i]); +   } + +   sp_tile_cache_map_surfaces(sp->zsbuf_cache); +} + + +/** + * Unmap any mapped drawing surfaces + */ +void +softpipe_unmap_surfaces(struct softpipe_context *sp) +{ +   uint i; + +   for (i = 0; i < sp->framebuffer.nr_cbufs; i++) +      sp_flush_tile_cache(sp, sp->cbuf_cache[i]); +   sp_flush_tile_cache(sp, sp->zsbuf_cache); + +   for (i = 0; i < sp->framebuffer.nr_cbufs; i++) { +      sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]); +   } +   sp_tile_cache_unmap_surfaces(sp->zsbuf_cache); +} + + +static void softpipe_destroy( struct pipe_context *pipe ) +{ +   struct softpipe_context *softpipe = softpipe_context( pipe ); +   struct pipe_screen *screen = pipe->screen; +   uint i; + +   if (softpipe->draw) +      draw_destroy( softpipe->draw ); + +   for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { +      softpipe->quad[i].polygon_stipple->destroy( softpipe->quad[i].polygon_stipple ); +      softpipe->quad[i].earlyz->destroy( softpipe->quad[i].earlyz ); +      softpipe->quad[i].shade->destroy( softpipe->quad[i].shade ); +      softpipe->quad[i].alpha_test->destroy( softpipe->quad[i].alpha_test ); +      softpipe->quad[i].depth_test->destroy( softpipe->quad[i].depth_test ); +      softpipe->quad[i].stencil_test->destroy( softpipe->quad[i].stencil_test ); +      softpipe->quad[i].occlusion->destroy( softpipe->quad[i].occlusion ); +      softpipe->quad[i].coverage->destroy( softpipe->quad[i].coverage ); +      softpipe->quad[i].blend->destroy( softpipe->quad[i].blend ); +      softpipe->quad[i].colormask->destroy( softpipe->quad[i].colormask ); +      softpipe->quad[i].output->destroy( softpipe->quad[i].output ); +   } + +   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) +      sp_destroy_tile_cache(softpipe->cbuf_cache[i]); +   sp_destroy_tile_cache(softpipe->zsbuf_cache); + +   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) +      sp_destroy_tile_cache(softpipe->tex_cache[i]); + +   for (i = 0; i < Elements(softpipe->constants); i++) { +      if (softpipe->constants[i].buffer) { +         pipe_buffer_reference(screen, &softpipe->constants[i].buffer, NULL); +      } +   } + +   FREE( softpipe ); +} + + +struct pipe_context * +softpipe_create( struct pipe_screen *screen, +                 struct pipe_winsys *pipe_winsys, +                 void *unused ) +{ +   struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); +   uint i; + +   util_init_math(); + +#ifdef PIPE_ARCH_X86 +   softpipe->use_sse = !debug_get_bool_option( "GALLIUM_NOSSE", FALSE ); +#else +   softpipe->use_sse = FALSE; +#endif + +   softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE ); + +   softpipe->pipe.winsys = pipe_winsys; +   softpipe->pipe.screen = screen; +   softpipe->pipe.destroy = softpipe_destroy; + +   /* state setters */ +   softpipe->pipe.create_blend_state = softpipe_create_blend_state; +   softpipe->pipe.bind_blend_state   = softpipe_bind_blend_state; +   softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; + +   softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; +   softpipe->pipe.bind_sampler_states  = softpipe_bind_sampler_states; +   softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; + +   softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; +   softpipe->pipe.bind_depth_stencil_alpha_state   = softpipe_bind_depth_stencil_state; +   softpipe->pipe.delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state; + +   softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state; +   softpipe->pipe.bind_rasterizer_state   = softpipe_bind_rasterizer_state; +   softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state; + +   softpipe->pipe.create_fs_state = softpipe_create_fs_state; +   softpipe->pipe.bind_fs_state   = softpipe_bind_fs_state; +   softpipe->pipe.delete_fs_state = softpipe_delete_fs_state; + +   softpipe->pipe.create_vs_state = softpipe_create_vs_state; +   softpipe->pipe.bind_vs_state   = softpipe_bind_vs_state; +   softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; + +   softpipe->pipe.set_blend_color = softpipe_set_blend_color; +   softpipe->pipe.set_clip_state = softpipe_set_clip_state; +   softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; +   softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; +   softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; +   softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; +   softpipe->pipe.set_sampler_textures = softpipe_set_sampler_textures; +   softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; + +   softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; +   softpipe->pipe.set_vertex_elements = softpipe_set_vertex_elements; + +   softpipe->pipe.draw_arrays = softpipe_draw_arrays; +   softpipe->pipe.draw_elements = softpipe_draw_elements; +   softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; +   softpipe->pipe.set_edgeflags = softpipe_set_edgeflags; + + +   softpipe->pipe.clear = softpipe_clear; +   softpipe->pipe.flush = softpipe_flush; + +   softpipe_init_query_funcs( softpipe ); +   softpipe_init_texture_funcs( softpipe ); + +   /* +    * Alloc caches for accessing drawing surfaces and textures. +    * Must be before quad stage setup! +    */ +   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) +      softpipe->cbuf_cache[i] = sp_create_tile_cache( screen ); +   softpipe->zsbuf_cache = sp_create_tile_cache( screen ); + +   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) +      softpipe->tex_cache[i] = sp_create_tile_cache( screen ); + + +   /* setup quad rendering stages */ +   for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { +      softpipe->quad[i].polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); +      softpipe->quad[i].earlyz = sp_quad_earlyz_stage(softpipe); +      softpipe->quad[i].shade = sp_quad_shade_stage(softpipe); +      softpipe->quad[i].alpha_test = sp_quad_alpha_test_stage(softpipe); +      softpipe->quad[i].depth_test = sp_quad_depth_test_stage(softpipe); +      softpipe->quad[i].stencil_test = sp_quad_stencil_test_stage(softpipe); +      softpipe->quad[i].occlusion = sp_quad_occlusion_stage(softpipe); +      softpipe->quad[i].coverage = sp_quad_coverage_stage(softpipe); +      softpipe->quad[i].blend = sp_quad_blend_stage(softpipe); +      softpipe->quad[i].colormask = sp_quad_colormask_stage(softpipe); +      softpipe->quad[i].output = sp_quad_output_stage(softpipe); +   } + +   /* vertex shader samplers */ +   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { +      softpipe->tgsi.vert_samplers[i].base.get_samples = sp_get_samples_vertex; +      softpipe->tgsi.vert_samplers[i].unit = i; +      softpipe->tgsi.vert_samplers[i].sp = softpipe; +      softpipe->tgsi.vert_samplers[i].cache = softpipe->tex_cache[i]; +      softpipe->tgsi.vert_samplers_list[i] = &softpipe->tgsi.vert_samplers[i]; +   } + +   /* fragment shader samplers */ +   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { +      softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples_fragment; +      softpipe->tgsi.frag_samplers[i].unit = i; +      softpipe->tgsi.frag_samplers[i].sp = softpipe; +      softpipe->tgsi.frag_samplers[i].cache = softpipe->tex_cache[i]; +      softpipe->tgsi.frag_samplers_list[i] = &softpipe->tgsi.frag_samplers[i]; +   } + +   /* +    * Create drawing context and plug our rendering stage into it. +    */ +   softpipe->draw = draw_create(); +   if (!softpipe->draw)  +      goto fail; + +   draw_texture_samplers(softpipe->draw, +                         PIPE_MAX_SAMPLERS, +                         (struct tgsi_sampler **) +                            softpipe->tgsi.vert_samplers_list); + +   softpipe->setup = sp_draw_render_stage(softpipe); +   if (!softpipe->setup) +      goto fail; + +   if (debug_get_bool_option( "SP_NO_RAST", FALSE )) +      softpipe->no_rast = TRUE; + +   if (debug_get_bool_option( "SP_NO_VBUF", FALSE )) { +      /* Deprecated path -- vbuf is the intended interface to the draw module: +       */ +      draw_set_rasterize_stage(softpipe->draw, softpipe->setup); +   } +   else { +      sp_init_vbuf(softpipe); +   } + +   /* plug in AA line/point stages */ +   draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); +   draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); + +#if USE_DRAW_STAGE_PSTIPPLE +   /* Do polygon stipple w/ texture map + frag prog? */ +   draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); +#endif + +   sp_init_surface_functions(softpipe); + +   return &softpipe->pipe; + + fail: +   softpipe_destroy(&softpipe->pipe); +   return NULL; +} + diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h new file mode 100644 index 0000000000..e2451c6ecb --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -0,0 +1,174 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_CONTEXT_H +#define SP_CONTEXT_H + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" + +#include "draw/draw_vertex.h" + +#include "sp_quad.h" +#include "sp_tex_sample.h" + + +/** + * This is a temporary variable for testing draw-stage polygon stipple. + * If zero, do stipple in sp_quad_stipple.c + */ +#define USE_DRAW_STAGE_PSTIPPLE 1 + +/* Number of threads working on individual quads. + * Setting to 1 disables this feature. + */ +#define SP_NUM_QUAD_THREADS 1 + +struct softpipe_winsys; +struct softpipe_vbuf_render; +struct draw_context; +struct draw_stage; +struct softpipe_tile_cache; +struct sp_fragment_shader; +struct sp_vertex_shader; + + +struct softpipe_context { +   struct pipe_context pipe;  /**< base class */ + +   /* The most recent drawing state as set by the driver: +    */ +   const struct pipe_blend_state   *blend; +   const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; +   const struct pipe_depth_stencil_alpha_state   *depth_stencil; +   const struct pipe_rasterizer_state *rasterizer; +   const struct sp_fragment_shader *fs; +   const struct sp_vertex_shader *vs; + +   struct pipe_blend_color blend_color; +   struct pipe_clip_state clip; +   struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; +   struct pipe_framebuffer_state framebuffer; +   struct pipe_poly_stipple poly_stipple; +   struct pipe_scissor_state scissor; +   struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; +   struct pipe_viewport_state viewport; +   struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; +   struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; +   unsigned dirty; + +   unsigned num_samplers; +   unsigned num_textures; +   unsigned num_vertex_elements; +   unsigned num_vertex_buffers; + +   boolean no_rast; + +   /* Counter for occlusion queries.  Note this supports overlapping +    * queries. +    */ +   uint64_t occlusion_count; + +   /* +    * Mapped vertex buffers +    */ +   ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; +    +   /** Mapped constant buffers */ +   void *mapped_constants[PIPE_SHADER_TYPES]; + +   /** Vertex format */ +   struct vertex_info vertex_info; +   struct vertex_info vertex_info_vbuf; + +   int psize_slot; + +   unsigned reduced_api_prim;  /**< PIPE_PRIM_POINTS, _LINES or _TRIANGLES */ + +#if 0 +   /* Stipple derived state: +    */ +   ubyte stipple_masks[16][16]; +#endif + +   /** Derived from scissor and surface bounds: */ +   struct pipe_scissor_state cliprect; + +   unsigned line_stipple_counter; + +   /** Software quad rendering pipeline */ +   struct { +      struct quad_stage *polygon_stipple; +      struct quad_stage *earlyz; +      struct quad_stage *shade; +      struct quad_stage *alpha_test; +      struct quad_stage *stencil_test; +      struct quad_stage *depth_test; +      struct quad_stage *occlusion; +      struct quad_stage *coverage; +      struct quad_stage *blend; +      struct quad_stage *colormask; +      struct quad_stage *output; + +      struct quad_stage *first; /**< points to one of the above stages */ +   } quad[SP_NUM_QUAD_THREADS]; + +   /** TGSI exec things */ +   struct { +      struct sp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS]; +      struct sp_shader_sampler *vert_samplers_list[PIPE_MAX_SAMPLERS]; +      struct sp_shader_sampler frag_samplers[PIPE_MAX_SAMPLERS]; +      struct sp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS]; +   } tgsi; + +   /** The primitive drawing context */ +   struct draw_context *draw; +   struct draw_stage *setup; +   struct draw_stage *vbuf; +   struct softpipe_vbuf_render *vbuf_render; + +   struct softpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS]; +   struct softpipe_tile_cache *zsbuf_cache; + +   struct softpipe_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; + +   int use_sse : 1; +   int dump_fs : 1; +}; + + +static INLINE struct softpipe_context * +softpipe_context( struct pipe_context *pipe ) +{ +   return (struct softpipe_context *)pipe; +} + +#endif /* SP_CONTEXT_H */ + diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c new file mode 100644 index 0000000000..7e3a25e34b --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -0,0 +1,208 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Author: + *    Brian Paul + *    Keith Whitwell + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" + +#include "sp_context.h" +#include "sp_state.h" + +#include "draw/draw_context.h" + + + +static void +softpipe_map_constant_buffers(struct softpipe_context *sp) +{ +   struct pipe_winsys *ws = sp->pipe.winsys; +   uint i, size; + +   for (i = 0; i < PIPE_SHADER_TYPES; i++) { +      if (sp->constants[i].buffer && sp->constants[i].buffer->size) +         sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, +                                                  PIPE_BUFFER_USAGE_CPU_READ); +   } + +   if (sp->constants[PIPE_SHADER_VERTEX].buffer) +      size = sp->constants[PIPE_SHADER_VERTEX].buffer->size; +   else +      size = 0; + +   draw_set_mapped_constant_buffer(sp->draw, +                                   sp->mapped_constants[PIPE_SHADER_VERTEX], +                                   size); +} + +static void +softpipe_unmap_constant_buffers(struct softpipe_context *sp) +{ +   struct pipe_winsys *ws = sp->pipe.winsys; +   uint i; + +   /* really need to flush all prims since the vert/frag shaders const buffers +    * are going away now. +    */ +   draw_flush(sp->draw); + +   draw_set_mapped_constant_buffer(sp->draw, NULL, 0); + +   for (i = 0; i < 2; i++) { +      if (sp->constants[i].buffer && sp->constants[i].buffer->size) +         ws->buffer_unmap(ws, sp->constants[i].buffer); +      sp->mapped_constants[i] = NULL; +   } +} + + +static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { +   PIPE_PRIM_POINTS, +   PIPE_PRIM_LINES, +   PIPE_PRIM_LINES, +   PIPE_PRIM_LINES, +   PIPE_PRIM_TRIANGLES, +   PIPE_PRIM_TRIANGLES, +   PIPE_PRIM_TRIANGLES, +   PIPE_PRIM_TRIANGLES, +   PIPE_PRIM_TRIANGLES, +   PIPE_PRIM_TRIANGLES +}; + + +boolean +softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, +                     unsigned start, unsigned count) +{ +   return softpipe_draw_elements(pipe, NULL, 0, mode, start, count); +} + + + +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + * + * XXX should the element buffer be specified/bound with a separate function? + */ + +boolean +softpipe_draw_range_elements(struct pipe_context *pipe, +                             struct pipe_buffer *indexBuffer, +                             unsigned indexSize, +                             unsigned min_index, +                             unsigned max_index, +                             unsigned mode, unsigned start, unsigned count) +{ +   struct softpipe_context *sp = softpipe_context(pipe); +   struct draw_context *draw = sp->draw; +   unsigned i; + +   sp->reduced_api_prim = reduced_prim[mode]; + +   if (sp->dirty) +      softpipe_update_derived( sp ); + +   softpipe_map_surfaces(sp); +   softpipe_map_constant_buffers(sp); + +   /* +    * Map vertex buffers +    */ +   for (i = 0; i < sp->num_vertex_buffers; i++) { +      void *buf +         = pipe_buffer_map(pipe->screen, +                                    sp->vertex_buffer[i].buffer, +                                    PIPE_BUFFER_USAGE_CPU_READ); +      draw_set_mapped_vertex_buffer(draw, i, buf); +   } +   /* Map index buffer, if present */ +   if (indexBuffer) { +      void *mapped_indexes +         = pipe_buffer_map(pipe->screen, indexBuffer, +                                    PIPE_BUFFER_USAGE_CPU_READ); +      draw_set_mapped_element_buffer_range(draw, indexSize, +                                           min_index, +                                           max_index, +                                           mapped_indexes); +   } +   else { +      /* no index/element buffer */ +      draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); +   } + + +   /* draw! */ +   draw_arrays(draw, mode, start, count); + +   /* +    * unmap vertex/index buffers - will cause draw module to flush +    */ +   for (i = 0; i < sp->num_vertex_buffers; i++) { +      draw_set_mapped_vertex_buffer(draw, i, NULL); +      pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); +   } +   if (indexBuffer) { +      draw_set_mapped_element_buffer(draw, 0, NULL); +      pipe_buffer_unmap(pipe->screen, indexBuffer); +   } + + +   /* Note: leave drawing surfaces mapped */ +   softpipe_unmap_constant_buffers(sp); + +   return TRUE; +} + +boolean +softpipe_draw_elements(struct pipe_context *pipe, +                       struct pipe_buffer *indexBuffer, +                       unsigned indexSize, +                       unsigned mode, unsigned start, unsigned count) +{ +   return softpipe_draw_range_elements( pipe, indexBuffer, +                                        indexSize, +                                        0, 0xffffffff, +                                        mode, start, count ); +} + + + +void +softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) +{ +   struct softpipe_context *sp = softpipe_context(pipe); +   draw_set_edgeflags(sp->draw, edgeflags); +} + diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c new file mode 100644 index 0000000000..c21faf57f3 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -0,0 +1,92 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Author: + *    Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_defines.h" +#include "draw/draw_context.h" +#include "sp_flush.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_state.h" +#include "sp_tile_cache.h" +#include "sp_winsys.h" + + +void +softpipe_flush( struct pipe_context *pipe, +		unsigned flags, +                struct pipe_fence_handle **fence ) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); +   uint i; + +   draw_flush(softpipe->draw); + +   if (flags & PIPE_FLUSH_TEXTURE_CACHE) { +      for (i = 0; i < softpipe->num_textures; i++) { +         sp_flush_tile_cache(softpipe, softpipe->tex_cache[i]); +      } +   } + +   if (flags & PIPE_FLUSH_RENDER_CACHE) { +      for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) +         if (softpipe->cbuf_cache[i]) +            sp_flush_tile_cache(softpipe, softpipe->cbuf_cache[i]); + +      if (softpipe->zsbuf_cache) +         sp_flush_tile_cache(softpipe, softpipe->zsbuf_cache); + +      /* Need this call for hardware buffers before swapbuffers. +       * +       * there should probably be another/different flush-type function +       * that's called before swapbuffers because we don't always want +       * to unmap surfaces when flushing. +       */ +      softpipe_unmap_surfaces(softpipe); +   } + +   /* Enable to dump BMPs of the color/depth buffers each frame */ +#if 0 +   if(flags & PIPE_FLUSH_FRAME) { +      static unsigned frame_no = 1; +      static char filename[256]; +      util_snprintf(filename, sizeof(filename), "cbuf_%u.bmp", frame_no); +      debug_dump_surface_bmp(filename, softpipe->framebuffer.cbufs[0]); +      util_snprintf(filename, sizeof(filename), "zsbuf_%u.bmp", frame_no); +      debug_dump_surface_bmp(filename, softpipe->framebuffer.zsbuf); +      ++frame_no; +   } +#endif +    +   if (fence) +      *fence = NULL; +} + diff --git a/src/gallium/drivers/softpipe/sp_flush.h b/src/gallium/drivers/softpipe/sp_flush.h new file mode 100644 index 0000000000..68d9b5fa83 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_flush.h @@ -0,0 +1,37 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef SP_FLUSH_H +#define SP_FLUSH_H + +struct pipe_context; +struct pipe_fence_handle; + +void softpipe_flush(struct pipe_context *pipe, unsigned flags, +                    struct pipe_fence_handle **fence); + +#endif diff --git a/src/gallium/drivers/softpipe/sp_fs.h b/src/gallium/drivers/softpipe/sp_fs.h new file mode 100644 index 0000000000..4792ace3a3 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_fs.h @@ -0,0 +1,54 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_FS_H +#define SP_FS_H + +struct sp_fragment_shader * +softpipe_create_fs_exec(struct softpipe_context *softpipe, +		       const struct pipe_shader_state *templ); + +struct sp_fragment_shader * +softpipe_create_fs_sse(struct softpipe_context *softpipe, +		       const struct pipe_shader_state *templ); + +struct sp_fragment_shader * +softpipe_create_fs_llvm(struct softpipe_context *softpipe, +			const struct pipe_shader_state *templ); + +struct tgsi_interp_coef; +struct tgsi_exec_vector; + +void sp_setup_pos_vector(const struct tgsi_interp_coef *coef, +			 float x, float y, +			 struct tgsi_exec_vector *quadpos); + + +#endif diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c new file mode 100644 index 0000000000..453b0373f0 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -0,0 +1,164 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_fs.h" +#include "sp_headers.h" + + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_parse.h" + +struct sp_exec_fragment_shader +{ +   struct sp_fragment_shader base; +}; + + +/** cast wrapper */ +static INLINE struct sp_exec_fragment_shader * +sp_exec_fragment_shader(const struct sp_fragment_shader *base) +{ +   return (struct sp_exec_fragment_shader *) base; +} + + +/** + * Compute quad X,Y,Z,W for the four fragments in a quad. + * + * This should really be part of the compiled shader. + */ +void +sp_setup_pos_vector(const struct tgsi_interp_coef *coef, +		    float x, float y, +		    struct tgsi_exec_vector *quadpos) +{ +   uint chan; +   /* do X */ +   quadpos->xyzw[0].f[0] = x; +   quadpos->xyzw[0].f[1] = x + 1; +   quadpos->xyzw[0].f[2] = x; +   quadpos->xyzw[0].f[3] = x + 1; + +   /* do Y */ +   quadpos->xyzw[1].f[0] = y; +   quadpos->xyzw[1].f[1] = y; +   quadpos->xyzw[1].f[2] = y + 1; +   quadpos->xyzw[1].f[3] = y + 1; + +   /* do Z and W for all fragments in the quad */ +   for (chan = 2; chan < 4; chan++) { +      const float dadx = coef->dadx[chan]; +      const float dady = coef->dady[chan]; +      const float a0 = coef->a0[chan] + dadx * x + dady * y; +      quadpos->xyzw[chan].f[0] = a0; +      quadpos->xyzw[chan].f[1] = a0 + dadx; +      quadpos->xyzw[chan].f[2] = a0 + dady; +      quadpos->xyzw[chan].f[3] = a0 + dadx + dady; +   } +} + + +static void +exec_prepare( const struct sp_fragment_shader *base, +	      struct tgsi_exec_machine *machine, +	      struct tgsi_sampler **samplers ) +{ +   /* +    * Bind tokens/shader to the interpreter's machine state. +    * Avoid redundant binding. +    */ +   if (machine->Tokens != base->shader.tokens) { +      tgsi_exec_machine_bind_shader( machine, +                                     base->shader.tokens, +                                     PIPE_MAX_SAMPLERS, +                                     samplers ); +   } +} + + + + +/* TODO: hide the machine struct in here somewhere, remove from this + * interface: + */ +static unsigned  +exec_run( const struct sp_fragment_shader *base, +	  struct tgsi_exec_machine *machine, +	  struct quad_header *quad ) +{ + +   /* Compute X, Y, Z, W vals for this quad */ +   sp_setup_pos_vector(quad->posCoef,  +		       (float)quad->input.x0, (float)quad->input.y0,  +		       &machine->QuadPos); +    +   return tgsi_exec_machine_run( machine ); +} + + + +static void  +exec_delete( struct sp_fragment_shader *base ) +{ +   FREE((void *) base->shader.tokens); +   FREE(base); +} + + + + + +struct sp_fragment_shader * +softpipe_create_fs_exec(struct softpipe_context *softpipe, +			const struct pipe_shader_state *templ) +{ +   struct sp_exec_fragment_shader *shader; + +   /* Decide whether we'll be codegenerating this shader and if so do +    * that now. +    */ + +   shader = CALLOC_STRUCT(sp_exec_fragment_shader); +   if (!shader) +      return NULL; + +   /* we need to keep a local copy of the tokens */ +   shader->base.shader.tokens = tgsi_dup_tokens(templ->tokens); +   shader->base.prepare = exec_prepare; +   shader->base.run = exec_run; +   shader->base.delete = exec_delete; + +   return &shader->base; +} + diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c new file mode 100644 index 0000000000..34adac5226 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c @@ -0,0 +1,200 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors: + *   Zack Rusin + */ + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_fs.h" + + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "tgsi/tgsi_sse2.h" + +#if 0 + +struct sp_llvm_fragment_shader { +   struct sp_fragment_shader base; +   struct gallivm_prog *llvm_prog; +}; + +static void +shade_quad_llvm(struct quad_stage *qs, +                struct quad_header *quad) +{ +   struct quad_shade_stage *qss = quad_shade_stage(qs); +   struct softpipe_context *softpipe = qs->softpipe; +   float dests[4][16][4] ALIGN16_ATTRIB; +   float inputs[4][16][4] ALIGN16_ATTRIB; +   const float fx = (float) quad->x0; +   const float fy = (float) quad->y0; +   struct gallivm_prog *llvm = qss->llvm_prog; + +   inputs[0][0][0] = fx; +   inputs[1][0][0] = fx + 1.0f; +   inputs[2][0][0] = fx; +   inputs[3][0][0] = fx + 1.0f; + +   inputs[0][0][1] = fy; +   inputs[1][0][1] = fy; +   inputs[2][0][1] = fy + 1.0f; +   inputs[3][0][1] = fy + 1.0f; + + +   gallivm_prog_inputs_interpolate(llvm, inputs, quad->coef); + +#if DLLVM +   debug_printf("MASK = %d\n", quad->mask); +   for (int i = 0; i < 4; ++i) { +      for (int j = 0; j < 2; ++j) { +         debug_printf("IN(%d,%d) [%f %f %f %f]\n", i, j,  +                inputs[i][j][0], inputs[i][j][1], inputs[i][j][2], inputs[i][j][3]); +      } +   } +#endif + +   quad->mask &= +      gallivm_fragment_shader_exec(llvm, fx, fy, dests, inputs, +                                   softpipe->mapped_constants[PIPE_SHADER_FRAGMENT], +                                   qss->samplers); +#if DLLVM +   debug_printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n", +          dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3],  +          dests[0][1][0], dests[0][1][1], dests[0][1][2], dests[0][1][3]); +#endif + +   /* store result color */ +   if (qss->colorOutSlot >= 0) { +      unsigned i; +      /* XXX need to handle multiple color outputs someday */ +      allvmrt(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] +             == TGSI_SEMANTIC_COLOR); +      for (i = 0; i < QUAD_SIZE; ++i) { +         quad->outputs.color[0][0][i] = dests[i][qss->colorOutSlot][0]; +         quad->outputs.color[0][1][i] = dests[i][qss->colorOutSlot][1]; +         quad->outputs.color[0][2][i] = dests[i][qss->colorOutSlot][2]; +         quad->outputs.color[0][3][i] = dests[i][qss->colorOutSlot][3]; +      } +   } +#if DLLVM +   for (int i = 0; i < QUAD_SIZE; ++i) { +      debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot, +             quad->outputs.color[0][0][i], +             quad->outputs.color[0][1][i], +             quad->outputs.color[0][2][i], +             quad->outputs.color[0][3][i]); +   } +#endif + +   /* store result Z */ +   if (qss->depthOutSlot >= 0) { +      /* output[slot] is new Z */ +      uint i; +      for (i = 0; i < 4; i++) { +         quad->outputs.depth[i] = dests[i][0][2]; +      } +   } +   else { +      /* copy input Z (which was interpolated by the executor) to output Z */ +      uint i; +      for (i = 0; i < 4; i++) { +         quad->outputs.depth[i] = inputs[i][0][2]; +      } +   } +#if DLLVM +   debug_printf("D [%f, %f, %f, %f] mask = %d\n", +             quad->outputs.depth[0], +             quad->outputs.depth[1], +             quad->outputs.depth[2], +             quad->outputs.depth[3], quad->mask); +#endif + +   /* shader may cull fragments */ +   if( quad->mask ) { +      qs->next->run( qs->next, quad ); +   } +} + + +unsigned  +run_llvm_fs( const struct sp_fragment_shader *base, +	     struct foo *machine ) +{ +} + + +void  +delete_llvm_fs( struct sp_fragment_shader *base ) +{ +   FREE(base); +} + + +struct sp_fragment_shader * +softpipe_create_fs_llvm(struct softpipe_context *softpipe, +		       const struct pipe_shader_state *templ) +{ +   struct sp_llvm_fragment_shader *shader = NULL; + +   /* LLVM fragment shaders currently disabled: +    */ +   state = CALLOC_STRUCT(sp_llvm_shader_state); +   if (!state) +      return NULL; + +   state->llvm_prog = 0; + +   if (!gallivm_global_cpu_engine()) { +      gallivm_cpu_engine_create(state->llvm_prog); +   } +   else +      gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog); +    +   if (shader) { +      shader->base.run = run_llvm_fs; +      shader->base.delete = delete_llvm_fs; +   } + +   return shader; +} + + +#else + +struct sp_fragment_shader * +softpipe_create_fs_llvm(struct softpipe_context *softpipe, +		       const struct pipe_shader_state *templ) +{ +   return NULL; +} + +#endif diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c new file mode 100644 index 0000000000..9a273c8764 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -0,0 +1,169 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_fs.h" +#include "sp_headers.h" + + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_sse2.h" + + +#if defined(PIPE_ARCH_X86) + +#include "rtasm/rtasm_x86sse.h" + +/* Surely this should be defined somewhere in a tgsi header: + */ +typedef void (PIPE_CDECL *codegen_function)( +   const struct tgsi_exec_vector *input, +   struct tgsi_exec_vector *output, +   const float (*constant)[4], +   struct tgsi_exec_vector *temporary, +   const struct tgsi_interp_coef *coef, +   float (*immediates)[4] +   //, const struct tgsi_exec_vector *quadPos + ); + + +struct sp_sse_fragment_shader { +   struct sp_fragment_shader base; +   struct x86_function             sse2_program; +   codegen_function func; +   float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; +}; + + + +static void +fs_sse_prepare( const struct sp_fragment_shader *base, +		struct tgsi_exec_machine *machine, +		struct tgsi_sampler **samplers ) +{ +} + + +/* TODO: codegenerate the whole run function, skip this wrapper. + * TODO: break dependency on tgsi_exec_machine struct + * TODO: push Position calculation into the generated shader + * TODO: process >1 quad at a time + */ +static unsigned  +fs_sse_run( const struct sp_fragment_shader *base, +	    struct tgsi_exec_machine *machine, +	    struct quad_header *quad ) +{ +   struct sp_sse_fragment_shader *shader = (struct sp_sse_fragment_shader *) base; + +   /* Compute X, Y, Z, W vals for this quad -- place in temp[0] for now */ +   sp_setup_pos_vector(quad->posCoef,  +		       (float)quad->input.x0, (float)quad->input.y0,  +		       machine->Temps); + +   /* init kill mask */ +   tgsi_set_kill_mask(machine, 0x0); +   tgsi_set_exec_mask(machine, 1, 1, 1, 1); + +   shader->func( machine->Inputs, +		 machine->Outputs, +		 machine->Consts, +		 machine->Temps, +		 machine->InterpCoefs, +                 shader->immediates +		 //	 , &machine->QuadPos +      ); + +   return ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]); +} + + +static void  +fs_sse_delete( struct sp_fragment_shader *base ) +{ +   struct sp_sse_fragment_shader *shader = (struct sp_sse_fragment_shader *) base; + +   x86_release_func( &shader->sse2_program ); +   FREE(shader); +} + + +struct sp_fragment_shader * +softpipe_create_fs_sse(struct softpipe_context *softpipe, +		       const struct pipe_shader_state *templ) +{ +   struct sp_sse_fragment_shader *shader; + +   if (!softpipe->use_sse) +      return NULL; + +   shader = CALLOC_STRUCT(sp_sse_fragment_shader); +   if (!shader) +      return NULL; + +   x86_init_func( &shader->sse2_program ); +    +   if (!tgsi_emit_sse2( templ->tokens, &shader->sse2_program, +                        shader->immediates, FALSE )) { +      FREE(shader); +      return NULL; +   } + +   shader->func = (codegen_function) x86_get_func( &shader->sse2_program ); +   if (!shader->func) { +      x86_release_func( &shader->sse2_program ); +      FREE(shader); +      return NULL; +   } + +   shader->base.shader = *templ; +   shader->base.prepare = fs_sse_prepare; +   shader->base.run = fs_sse_run; +   shader->base.delete = fs_sse_delete; + +   return &shader->base; +} + + +#else + +/* Maybe put this varient in the header file. + */ +struct sp_fragment_shader * +softpipe_create_fs_sse(struct softpipe_context *softpipe, +		       const struct pipe_shader_state *templ) +{ +   return NULL; +} + +#endif diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h new file mode 100644 index 0000000000..4a42cb3c19 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_headers.h @@ -0,0 +1,95 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_HEADERS_H +#define SP_HEADERS_H + +#include "pipe/p_state.h" +#include "tgsi/tgsi_exec.h" + +#define PRIM_POINT 1 +#define PRIM_LINE  2 +#define PRIM_TRI   3 + + +/* The rasterizer generates 2x2 quads of fragment and feeds them to + * the current fp_machine (see below). + * Remember that Y=0=top with Y increasing down the window. + */ +#define QUAD_TOP_LEFT     0 +#define QUAD_TOP_RIGHT    1 +#define QUAD_BOTTOM_LEFT  2 +#define QUAD_BOTTOM_RIGHT 3 + +#define MASK_TOP_LEFT     (1 << QUAD_TOP_LEFT) +#define MASK_TOP_RIGHT    (1 << QUAD_TOP_RIGHT) +#define MASK_BOTTOM_LEFT  (1 << QUAD_BOTTOM_LEFT) +#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) +#define MASK_ALL          0xf + + +/** + * Encodes everything we need to know about a 2x2 pixel block.  Uses + * "Channel-Serial" or "SoA" layout.   + */ +struct quad_header_input +{ +   int x0; +   int y0; +   float coverage[QUAD_SIZE];    /** fragment coverage for antialiasing */ +   unsigned facing:1;   /**< Front (0) or back (1) facing? */ +   unsigned prim:2;     /**< PRIM_POINT, LINE, TRI */ +}; + +struct quad_header_inout +{ +   unsigned mask:4; +}; + +struct quad_header_output +{ +   /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ +   float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; +   float depth[QUAD_SIZE]; +}; + +struct quad_header { +   struct quad_header_input input; +   struct quad_header_inout inout; +   struct quad_header_output output; + +   const struct tgsi_interp_coef *coef; +   const struct tgsi_interp_coef *posCoef; + +   unsigned nr_attrs; +}; + +#endif /* SP_HEADERS_H */ + diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c new file mode 100644 index 0000000000..038ff04d4f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -0,0 +1,190 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * \brief A draw stage that drives our triangle setup routines from + * within the draw pipeline.  One of two ways to drive setup, the + * other being in sp_prim_vbuf.c. + * + * \author  Keith Whitwell <keith@tungstengraphics.com> + * \author  Brian Paul + */ + + +#include "sp_context.h" +#include "sp_setup.h" +#include "sp_state.h" +#include "sp_prim_setup.h" +#include "draw/draw_pipe.h" +#include "draw/draw_vertex.h" +#include "util/u_memory.h" + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_stage { +   struct draw_stage stage; /**< This must be first (base class) */ + +   struct setup_context *setup; +}; + + + +/** + * Basically a cast wrapper. + */ +static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) +{ +   return (struct setup_stage *)stage; +} + + +typedef const float (*cptrf4)[4]; + +static void +do_tri(struct draw_stage *stage, struct prim_header *prim) +{ +   struct setup_stage *setup = setup_stage( stage ); +    +   setup_tri( setup->setup, +              (cptrf4)prim->v[0]->data, +              (cptrf4)prim->v[1]->data, +              (cptrf4)prim->v[2]->data ); +} + +static void +do_line(struct draw_stage *stage, struct prim_header *prim) +{ +   struct setup_stage *setup = setup_stage( stage ); + +   setup_line( setup->setup, +               (cptrf4)prim->v[0]->data, +               (cptrf4)prim->v[1]->data ); +} + +static void +do_point(struct draw_stage *stage, struct prim_header *prim) +{ +   struct setup_stage *setup = setup_stage( stage ); + +   setup_point( setup->setup, +                (cptrf4)prim->v[0]->data ); +} + + + + +static void setup_begin( struct draw_stage *stage ) +{ +   struct setup_stage *setup = setup_stage(stage); + +   setup_prepare( setup->setup ); + +   stage->point = do_point; +   stage->line = do_line; +   stage->tri = do_tri; +} + + +static void setup_first_point( struct draw_stage *stage, +			       struct prim_header *header ) +{ +   setup_begin(stage); +   stage->point( stage, header ); +} + +static void setup_first_line( struct draw_stage *stage, +			       struct prim_header *header ) +{ +   setup_begin(stage); +   stage->line( stage, header ); +} + + +static void setup_first_tri( struct draw_stage *stage, +			       struct prim_header *header ) +{ +   setup_begin(stage); +   stage->tri( stage, header ); +} + + + +static void setup_flush( struct draw_stage *stage, +			 unsigned flags ) +{ +   stage->point = setup_first_point; +   stage->line = setup_first_line; +   stage->tri = setup_first_tri; +} + + +static void reset_stipple_counter( struct draw_stage *stage ) +{ +} + + +static void render_destroy( struct draw_stage *stage ) +{ +   struct setup_stage *ssetup = setup_stage(stage); +   setup_destroy_context(ssetup->setup); +   FREE( stage ); +} + + +/** + * Create a new primitive setup/render stage. + */ +struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe ) +{ +   struct setup_stage *sstage = CALLOC_STRUCT(setup_stage); + +   sstage->setup = setup_create_context(softpipe); +   sstage->stage.draw = softpipe->draw; +   sstage->stage.point = setup_first_point; +   sstage->stage.line = setup_first_line; +   sstage->stage.tri = setup_first_tri; +   sstage->stage.flush = setup_flush; +   sstage->stage.reset_stipple_counter = reset_stipple_counter; +   sstage->stage.destroy = render_destroy; + +   return (struct draw_stage *)sstage; +} + +struct setup_context * +sp_draw_setup_context( struct draw_stage *stage ) +{ +   struct setup_stage *ssetup = setup_stage(stage); +   return ssetup->setup; +} + +void +sp_draw_flush( struct draw_stage *stage ) +{ +   stage->flush( stage, 0 ); +} diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.h b/src/gallium/drivers/softpipe/sp_prim_setup.h new file mode 100644 index 0000000000..49bdd98ed8 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_setup.h @@ -0,0 +1,85 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#ifndef SP_PRIM_SETUP_H +#define SP_PRIM_SETUP_H + + +/** + * vbuf is a special stage to gather the stream of triangles, lines, points + * together and reconstruct vertex buffers for hardware upload. + * + * First attempt, work in progress. + *  + * TODO: + *    - separate out vertex buffer building and primitive emit, ie >1 draw per vb. + *    - tell vbuf stage how to build hw vertices directly + *    - pass vbuf stage a buffer pointer for direct emit to agp/vram. + * + * + * + * Vertices are just an array of floats, with all the attributes + * packed.  We currently assume a layout like: + * + * attr[0][0..3] - window position + * attr[1..n][0..3] - remaining attributes. + * + * Attributes are assumed to be 4 floats wide but are packed so that + * all the enabled attributes run contiguously. + */ + + +struct draw_stage; +struct softpipe_context; + + +typedef void (*vbuf_draw_func)( struct pipe_context *pipe, +                                unsigned prim, +                                const ushort *elements, +                                unsigned nr_elements, +                                const void *vertex_buffer, +                                unsigned nr_vertices ); + + +extern struct draw_stage * +sp_draw_render_stage( struct softpipe_context *softpipe ); + +extern struct setup_context * +sp_draw_setup_context( struct draw_stage * ); + +extern void +sp_draw_flush( struct draw_stage * ); + + +extern struct draw_stage * +sp_draw_vbuf_stage( struct draw_context *draw_context, +                    struct pipe_context *pipe, +                    vbuf_draw_func draw ); + + +#endif /* SP_PRIM_SETUP_H */ diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c new file mode 100644 index 0000000000..9cd5784e5b --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -0,0 +1,410 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Interface between 'draw' module's output and the softpipe rasterizer/setup + * code.  When the 'draw' module has finished filling a vertex buffer, the + * draw_arrays() functions below will be called.  Loop over the vertices and + * call the point/line/tri setup functions. + * + * Authors + *  Brian Paul + */ + + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_prim_vbuf.h" +#include "sp_prim_setup.h" +#include "sp_setup.h" +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "util/u_memory.h" + + +#define SP_MAX_VBUF_INDEXES 1024 +#define SP_MAX_VBUF_SIZE    4096 + +typedef const float (*cptrf4)[4]; + +/** + * Subclass of vbuf_render. + */ +struct softpipe_vbuf_render +{ +   struct vbuf_render base; +   struct softpipe_context *softpipe; +   uint prim; +   uint vertex_size; +   void *vertex_buffer; +}; + + +/** cast wrapper */ +static struct softpipe_vbuf_render * +softpipe_vbuf_render(struct vbuf_render *vbr) +{ +   return (struct softpipe_vbuf_render *) vbr; +} + + +static const struct vertex_info * +sp_vbuf_get_vertex_info(struct vbuf_render *vbr) +{ +   struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); +   return softpipe_get_vbuf_vertex_info(cvbr->softpipe); +} + + +static void * +sp_vbuf_allocate_vertices(struct vbuf_render *vbr, +                            ushort vertex_size, ushort nr_vertices) +{ +   struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); +   assert(!cvbr->vertex_buffer); +   cvbr->vertex_buffer = align_malloc(vertex_size * nr_vertices, 16); +   cvbr->vertex_size = vertex_size; +   return cvbr->vertex_buffer; +} + + +static void +sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, +                           unsigned vertex_size, unsigned vertices_used) +{ +   struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); +   align_free(vertices); +   assert(vertices == cvbr->vertex_buffer); +   cvbr->vertex_buffer = NULL; +} + + +static boolean +sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) +{ +   struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + +   /* XXX: break this dependency - make setup_context live under +    * softpipe, rename the old "setup" draw stage to something else. +    */ +   struct setup_context *setup_ctx = sp_draw_setup_context(cvbr->softpipe->setup); +    +   setup_prepare( setup_ctx ); + + + +   cvbr->prim = prim; +   return TRUE; + +} + + +static INLINE cptrf4 get_vert( const void *vertex_buffer, +                               int index, +                               int stride ) +{ +   return (cptrf4)((char *)vertex_buffer + index * stride); +} + + +/** + * draw elements / indexed primitives + */ +static void +sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) +{ +   struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); +   struct softpipe_context *softpipe = cvbr->softpipe; +   const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); +   const void *vertex_buffer = cvbr->vertex_buffer; +   unsigned i; + +   /* XXX: break this dependency - make setup_context live under +    * softpipe, rename the old "setup" draw stage to something else. +    */ +   struct draw_stage *setup = softpipe->setup; +   struct setup_context *setup_ctx = sp_draw_setup_context(setup); + +   switch (cvbr->prim) { +   case PIPE_PRIM_POINTS: +      for (i = 0; i < nr; i++) { +         setup_point( setup_ctx, +                      get_vert(vertex_buffer, indices[i-0], stride) ); +      } +      break; + +   case PIPE_PRIM_LINES: +      for (i = 1; i < nr; i += 2) { +         setup_line( setup_ctx, +                     get_vert(vertex_buffer, indices[i-1], stride), +                     get_vert(vertex_buffer, indices[i-0], stride) ); +      } +      break; + +   case PIPE_PRIM_LINE_STRIP: +      for (i = 1; i < nr; i ++) { +         setup_line( setup_ctx, +                     get_vert(vertex_buffer, indices[i-1], stride), +                     get_vert(vertex_buffer, indices[i-0], stride) ); +      } +      break; + +   case PIPE_PRIM_LINE_LOOP: +      for (i = 1; i < nr; i ++) { +         setup_line( setup_ctx, +                     get_vert(vertex_buffer, indices[i-1], stride), +                     get_vert(vertex_buffer, indices[i-0], stride) ); +      } +      if (nr) { +         setup_line( setup_ctx, +                     get_vert(vertex_buffer, indices[nr-1], stride), +                     get_vert(vertex_buffer, indices[0], stride) ); +      } +      break; + + +   case PIPE_PRIM_TRIANGLES: +      for (i = 2; i < nr; i += 3) { +         setup_tri( setup_ctx, +                    get_vert(vertex_buffer, indices[i-2], stride), +                    get_vert(vertex_buffer, indices[i-1], stride), +                    get_vert(vertex_buffer, indices[i-0], stride)); +      } +      break; + +   case PIPE_PRIM_TRIANGLE_STRIP: +      for (i = 2; i < nr; i += 1) { +         setup_tri( setup_ctx, +                    get_vert(vertex_buffer, indices[i+(i&1)-2], stride), +                    get_vert(vertex_buffer, indices[i-(i&1)-1], stride), +                    get_vert(vertex_buffer, indices[i-0], stride)); +      } +      break; + +   case PIPE_PRIM_TRIANGLE_FAN: +   case PIPE_PRIM_POLYGON: +      for (i = 2; i < nr; i += 1) { +         setup_tri( setup_ctx, +                    get_vert(vertex_buffer, indices[0], stride), +                    get_vert(vertex_buffer, indices[i-1], stride), +                    get_vert(vertex_buffer, indices[i-0], stride)); +      } +      break; +   case PIPE_PRIM_QUADS: +      for (i = 3; i < nr; i += 4) { +         setup_tri( setup_ctx, +                    get_vert(vertex_buffer, indices[i-3], stride), +                    get_vert(vertex_buffer, indices[i-2], stride), +                    get_vert(vertex_buffer, indices[i-0], stride)); + +         setup_tri( setup_ctx, +                    get_vert(vertex_buffer, indices[i-2], stride), +                    get_vert(vertex_buffer, indices[i-1], stride), +                    get_vert(vertex_buffer, indices[i-0], stride)); +      } +      break; +   case PIPE_PRIM_QUAD_STRIP: +      for (i = 3; i < nr; i += 2) { +         setup_tri( setup_ctx, +                    get_vert(vertex_buffer, indices[i-3], stride), +                    get_vert(vertex_buffer, indices[i-2], stride), +                    get_vert(vertex_buffer, indices[i-0], stride)); + +         setup_tri( setup_ctx, +                    get_vert(vertex_buffer, indices[i-1], stride), +                    get_vert(vertex_buffer, indices[i-3], stride), +                    get_vert(vertex_buffer, indices[i-0], stride)); +      } +      break; +   default: +      assert(0); +   } + +   /* XXX: why are we calling this???  If we had to call something, it +    * would be a function in sp_setup.c: +    */ +   sp_draw_flush( setup ); +} + + +/** + * This function is hit when the draw module is working in pass-through mode. + * It's up to us to convert the vertex array into point/line/tri prims. + */ +static void +sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) +{ +   struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); +   struct softpipe_context *softpipe = cvbr->softpipe; +   const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); +   const void *vertex_buffer = +      (void *) get_vert(cvbr->vertex_buffer, start, stride); +   unsigned i; + +   /* XXX: break this dependency - make setup_context live under +    * softpipe, rename the old "setup" draw stage to something else. +    */ +   struct draw_stage *setup = softpipe->setup; +   struct setup_context *setup_ctx = sp_draw_setup_context(setup); + +   switch (cvbr->prim) { +   case PIPE_PRIM_POINTS: +      for (i = 0; i < nr; i++) { +         setup_point( setup_ctx, +                      get_vert(vertex_buffer, i-0, stride) ); +      } +      break; + +   case PIPE_PRIM_LINES: +      for (i = 1; i < nr; i += 2) { +         setup_line( setup_ctx, +                     get_vert(vertex_buffer, i-1, stride), +                     get_vert(vertex_buffer, i-0, stride) ); +      } +      break; + +   case PIPE_PRIM_LINE_STRIP: +      for (i = 1; i < nr; i ++) { +         setup_line( setup_ctx, +                     get_vert(vertex_buffer, i-1, stride), +                     get_vert(vertex_buffer, i-0, stride) ); +      } +      break; + +   case PIPE_PRIM_LINE_LOOP: +      for (i = 1; i < nr; i ++) { +         setup_line( setup_ctx, +                     get_vert(vertex_buffer, i-1, stride), +                     get_vert(vertex_buffer, i-0, stride) ); +      } +      if (nr) { +         setup_line( setup_ctx, +                     get_vert(vertex_buffer, nr-1, stride), +                     get_vert(vertex_buffer, 0, stride) ); +      } +      break; + + +   case PIPE_PRIM_TRIANGLES: +      for (i = 2; i < nr; i += 3) { +         setup_tri( setup_ctx, +                    get_vert(vertex_buffer, i-2, stride), +                    get_vert(vertex_buffer, i-1, stride), +                    get_vert(vertex_buffer, i-0, stride)); +      } +      break; + +   case PIPE_PRIM_TRIANGLE_STRIP: +      for (i = 2; i < nr; i += 1) { +         setup_tri( setup_ctx, +                    get_vert(vertex_buffer, i+(i&1)-2, stride), +                    get_vert(vertex_buffer, i-(i&1)-1, stride), +                    get_vert(vertex_buffer, i-0, stride)); +      } +      break; + +   case PIPE_PRIM_TRIANGLE_FAN: +   case PIPE_PRIM_POLYGON: +      for (i = 2; i < nr; i += 1) { +         setup_tri( setup_ctx, +                    get_vert(vertex_buffer, 0, stride), +                    get_vert(vertex_buffer, i-1, stride), +                    get_vert(vertex_buffer, i-0, stride)); +      } +      break; +   case PIPE_PRIM_QUADS: +      for (i = 3; i < nr; i += 4) { +         setup_tri( setup_ctx, +                    get_vert(vertex_buffer, i-3, stride), +                    get_vert(vertex_buffer, i-2, stride), +                    get_vert(vertex_buffer, i-0, stride)); + +         setup_tri( setup_ctx, +                    get_vert(vertex_buffer, i-2, stride), +                    get_vert(vertex_buffer, i-1, stride), +                    get_vert(vertex_buffer, i-0, stride)); +      } +      break; +   case PIPE_PRIM_QUAD_STRIP: +      for (i = 3; i < nr; i += 2) { +         setup_tri( setup_ctx, +                    get_vert(vertex_buffer, i-3, stride), +                    get_vert(vertex_buffer, i-2, stride), +                    get_vert(vertex_buffer, i-0, stride)); + +         setup_tri( setup_ctx, +                    get_vert(vertex_buffer, i-1, stride), +                    get_vert(vertex_buffer, i-3, stride), +                    get_vert(vertex_buffer, i-0, stride)); +      } +      break; +   default: +      assert(0); +   } +} + + + +static void +sp_vbuf_destroy(struct vbuf_render *vbr) +{ +   struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); +   cvbr->softpipe->vbuf_render = NULL; +   FREE(cvbr); +} + + +/** + * Initialize the post-transform vertex buffer information for the given + * context. + */ +void +sp_init_vbuf(struct softpipe_context *sp) +{ +   assert(sp->draw); + +   sp->vbuf_render = CALLOC_STRUCT(softpipe_vbuf_render); + +   sp->vbuf_render->base.max_indices = SP_MAX_VBUF_INDEXES; +   sp->vbuf_render->base.max_vertex_buffer_bytes = SP_MAX_VBUF_SIZE; + +   sp->vbuf_render->base.get_vertex_info = sp_vbuf_get_vertex_info; +   sp->vbuf_render->base.allocate_vertices = sp_vbuf_allocate_vertices; +   sp->vbuf_render->base.set_primitive = sp_vbuf_set_primitive; +   sp->vbuf_render->base.draw = sp_vbuf_draw; +   sp->vbuf_render->base.draw_arrays = sp_vbuf_draw_arrays; +   sp->vbuf_render->base.release_vertices = sp_vbuf_release_vertices; +   sp->vbuf_render->base.destroy = sp_vbuf_destroy; + +   sp->vbuf_render->softpipe = sp; + +   sp->vbuf = draw_vbuf_stage(sp->draw, &sp->vbuf_render->base); + +   draw_set_rasterize_stage(sp->draw, sp->vbuf); + +   draw_set_render(sp->draw, &sp->vbuf_render->base); +} diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.h b/src/gallium/drivers/softpipe/sp_prim_vbuf.h new file mode 100644 index 0000000000..1de9cc2a89 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.h @@ -0,0 +1,38 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef SP_VBUF_H +#define SP_VBUF_H + + +struct softpipe_context; + +extern void +sp_init_vbuf(struct softpipe_context *softpipe); + + +#endif /* SP_VBUF_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c new file mode 100644 index 0000000000..892ef87ee9 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -0,0 +1,118 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include "sp_context.h" +#include "sp_state.h" +#include "pipe/p_shader_tokens.h" + +static void +sp_push_quad_first( +   struct softpipe_context *sp, +   struct quad_stage *quad, +   uint i ) +{ +   quad->next = sp->quad[i].first; +   sp->quad[i].first = quad; +} + +static void +sp_build_depth_stencil( +   struct softpipe_context *sp, +   uint i ) +{ +   if (sp->depth_stencil->stencil[0].enabled || +       sp->depth_stencil->stencil[1].enabled) { +      sp_push_quad_first( sp, sp->quad[i].stencil_test, i ); +   } +   else if (sp->depth_stencil->depth.enabled && +            sp->framebuffer.zsbuf) { +      sp_push_quad_first( sp, sp->quad[i].depth_test, i ); +   } +} + +void +sp_build_quad_pipeline(struct softpipe_context *sp) +{ +   uint i; + +   boolean early_depth_test = +               sp->depth_stencil->depth.enabled && +               sp->framebuffer.zsbuf && +               !sp->depth_stencil->alpha.enabled && +               !sp->fs->info.uses_kill && +               !sp->fs->info.writes_z; + +   /* build up the pipeline in reverse order... */ +   for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { +      sp->quad[i].first = sp->quad[i].output; + +      if (sp->blend->colormask != 0xf) { +         sp_push_quad_first( sp, sp->quad[i].colormask, i ); +      } + +      if (sp->blend->blend_enable || +          sp->blend->logicop_enable) { +         sp_push_quad_first( sp, sp->quad[i].blend, i ); +      } + +      if (sp->depth_stencil->depth.occlusion_count) { +         sp_push_quad_first( sp, sp->quad[i].occlusion, i ); +      } + +      if (sp->rasterizer->poly_smooth || +          sp->rasterizer->line_smooth || +          sp->rasterizer->point_smooth) { +         sp_push_quad_first( sp, sp->quad[i].coverage, i ); +      } + +      if (!early_depth_test) { +         sp_build_depth_stencil( sp, i ); +      } + +      if (sp->depth_stencil->alpha.enabled) { +         sp_push_quad_first( sp, sp->quad[i].alpha_test, i ); +      } + +      /* XXX always enable shader? */ +      if (1) { +         sp_push_quad_first( sp, sp->quad[i].shade, i ); +      } + +      if (early_depth_test) { +         sp_build_depth_stencil( sp, i ); +         sp_push_quad_first( sp, sp->quad[i].earlyz, i ); +      } + +#if !USE_DRAW_STAGE_PSTIPPLE +      if (sp->rasterizer->poly_stipple_enable) { +         sp_push_quad_first( sp, sp->quad[i].polygon_stipple, i ); +      } +#endif +   } +} + diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h new file mode 100644 index 0000000000..08513cb95f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad.h @@ -0,0 +1,69 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_QUAD_H +#define SP_QUAD_H + + +struct softpipe_context; +struct quad_header; + + +struct quad_stage { +   struct softpipe_context *softpipe; + +   struct quad_stage *next; + +   void (*begin)(struct quad_stage *qs); + +   /** the stage action */ +   void (*run)(struct quad_stage *qs, struct quad_header *quad); + +   void (*destroy)(struct quad_stage *qs); +}; + + +struct quad_stage *sp_quad_polygon_stipple_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_earlyz_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_alpha_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ); + +void sp_build_quad_pipeline(struct softpipe_context *sp); + +void sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad); + +#endif /* SP_QUAD_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c new file mode 100644 index 0000000000..85c9f037a3 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -0,0 +1,108 @@ + +/** + * quad alpha test + */ + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" + + +static void +alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ +   struct softpipe_context *softpipe = qs->softpipe; +   const float ref = softpipe->depth_stencil->alpha.ref_value; +   unsigned passMask = 0x0, j; +   const uint cbuf = 0; /* only output[0].alpha is tested */ +   const float *aaaa = quad->output.color[cbuf][3]; + +   switch (softpipe->depth_stencil->alpha.func) { +   case PIPE_FUNC_NEVER: +      break; +   case PIPE_FUNC_LESS: +      /* +       * If mask were an array [4] we could do this SIMD-style: +       * passMask = (quad->outputs.color[0][3] <= vec4(ref)); +       */ +      for (j = 0; j < QUAD_SIZE; j++) { +         if (aaaa[j] < ref) { +            passMask |= (1 << j); +         } +      } +      break; +   case PIPE_FUNC_EQUAL: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (aaaa[j] == ref) { +            passMask |= (1 << j); +         } +      } +      break; +   case PIPE_FUNC_LEQUAL: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (aaaa[j] <= ref) { +            passMask |= (1 << j); +         } +      } +      break; +   case PIPE_FUNC_GREATER: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (aaaa[j] > ref) { +            passMask |= (1 << j); +         } +      } +      break; +   case PIPE_FUNC_NOTEQUAL: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (aaaa[j] != ref) { +            passMask |= (1 << j); +         } +      } +      break; +   case PIPE_FUNC_GEQUAL: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (aaaa[j] >= ref) { +            passMask |= (1 << j); +         } +      } +      break; +   case PIPE_FUNC_ALWAYS: +      passMask = MASK_ALL; +      break; +   default: +      assert(0); +   } + +   quad->inout.mask &= passMask; + +   if (quad->inout.mask) +      qs->next->run(qs->next, quad); +} + + +static void alpha_test_begin(struct quad_stage *qs) +{ +   qs->next->begin(qs->next); +} + + +static void alpha_test_destroy(struct quad_stage *qs) +{ +   FREE( qs ); +} + + +struct quad_stage * +sp_quad_alpha_test_stage( struct softpipe_context *softpipe ) +{ +   struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + +   stage->softpipe = softpipe; +   stage->begin = alpha_test_begin; +   stage->run = alpha_test_quad; +   stage->destroy = alpha_test_destroy; + +   return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c new file mode 100644 index 0000000000..fb1d430a4f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -0,0 +1,759 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * quad blending + * \author Brian Paul + */ + +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_quad.h" + + +#define VEC4_COPY(DST, SRC) \ +do { \ +    DST[0] = SRC[0]; \ +    DST[1] = SRC[1]; \ +    DST[2] = SRC[2]; \ +    DST[3] = SRC[3]; \ +} while(0) + +#define VEC4_SCALAR(DST, SRC) \ +do { \ +    DST[0] = SRC; \ +    DST[1] = SRC; \ +    DST[2] = SRC; \ +    DST[3] = SRC; \ +} while(0) + +#define VEC4_ADD(R, A, B) \ +do { \ +   R[0] = A[0] + B[0]; \ +   R[1] = A[1] + B[1]; \ +   R[2] = A[2] + B[2]; \ +   R[3] = A[3] + B[3]; \ +} while (0) + +#define VEC4_SUB(R, A, B) \ +do { \ +   R[0] = A[0] - B[0]; \ +   R[1] = A[1] - B[1]; \ +   R[2] = A[2] - B[2]; \ +   R[3] = A[3] - B[3]; \ +} while (0) + +#define VEC4_MUL(R, A, B) \ +do { \ +   R[0] = A[0] * B[0]; \ +   R[1] = A[1] * B[1]; \ +   R[2] = A[2] * B[2]; \ +   R[3] = A[3] * B[3]; \ +} while (0) + +#define VEC4_MIN(R, A, B) \ +do { \ +   R[0] = (A[0] < B[0]) ? A[0] : B[0]; \ +   R[1] = (A[1] < B[1]) ? A[1] : B[1]; \ +   R[2] = (A[2] < B[2]) ? A[2] : B[2]; \ +   R[3] = (A[3] < B[3]) ? A[3] : B[3]; \ +} while (0) + +#define VEC4_MAX(R, A, B) \ +do { \ +   R[0] = (A[0] > B[0]) ? A[0] : B[0]; \ +   R[1] = (A[1] > B[1]) ? A[1] : B[1]; \ +   R[2] = (A[2] > B[2]) ? A[2] : B[2]; \ +   R[3] = (A[3] > B[3]) ? A[3] : B[3]; \ +} while (0) + + + +static void +logicop_quad(struct quad_stage *qs, struct quad_header *quad) +{ +   struct softpipe_context *softpipe = qs->softpipe; +   uint cbuf; + +   /* loop over colorbuffer outputs */ +   for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { +      float dest[4][QUAD_SIZE]; +      ubyte src[4][4], dst[4][4], res[4][4]; +      uint *src4 = (uint *) src; +      uint *dst4 = (uint *) dst; +      uint *res4 = (uint *) res; +      struct softpipe_cached_tile * +         tile = sp_get_cached_tile(softpipe, +                                   softpipe->cbuf_cache[cbuf], +                                   quad->input.x0, quad->input.y0); +      float (*quadColor)[4] = quad->output.color[cbuf]; +      uint i, j; + +      /* get/swizzle dest colors */ +      for (j = 0; j < QUAD_SIZE; j++) { +         int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); +         int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); +         for (i = 0; i < 4; i++) { +            dest[i][j] = tile->data.color[y][x][i]; +         } +      } + +      /* convert to ubyte */ +      for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ +         dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */ +         dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */ +         dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */ +         dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */ + +         src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */ +         src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */ +         src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */ +         src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */ +      } + +      switch (softpipe->blend->logicop_func) { +      case PIPE_LOGICOP_CLEAR: +         for (j = 0; j < 4; j++) +            res4[j] = 0; +         break; +      case PIPE_LOGICOP_NOR: +         for (j = 0; j < 4; j++) +            res4[j] = ~(src4[j] | dst4[j]); +         break; +      case PIPE_LOGICOP_AND_INVERTED: +         for (j = 0; j < 4; j++) +            res4[j] = ~src4[j] & dst4[j]; +         break; +      case PIPE_LOGICOP_COPY_INVERTED: +         for (j = 0; j < 4; j++) +            res4[j] = ~src4[j]; +         break; +      case PIPE_LOGICOP_AND_REVERSE: +         for (j = 0; j < 4; j++) +            res4[j] = src4[j] & ~dst4[j]; +         break; +      case PIPE_LOGICOP_INVERT: +         for (j = 0; j < 4; j++) +            res4[j] = ~dst4[j]; +         break; +      case PIPE_LOGICOP_XOR: +         for (j = 0; j < 4; j++) +            res4[j] = dst4[j] ^ src4[j]; +         break; +      case PIPE_LOGICOP_NAND: +         for (j = 0; j < 4; j++) +            res4[j] = ~(src4[j] & dst4[j]); +         break; +      case PIPE_LOGICOP_AND: +         for (j = 0; j < 4; j++) +            res4[j] = src4[j] & dst4[j]; +         break; +      case PIPE_LOGICOP_EQUIV: +         for (j = 0; j < 4; j++) +            res4[j] = ~(src4[j] ^ dst4[j]); +         break; +      case PIPE_LOGICOP_NOOP: +         for (j = 0; j < 4; j++) +            res4[j] = dst4[j]; +         break; +      case PIPE_LOGICOP_OR_INVERTED: +         for (j = 0; j < 4; j++) +            res4[j] = ~src4[j] | dst4[j]; +         break; +      case PIPE_LOGICOP_COPY: +         for (j = 0; j < 4; j++) +            res4[j] = src4[j]; +         break; +      case PIPE_LOGICOP_OR_REVERSE: +         for (j = 0; j < 4; j++) +            res4[j] = src4[j] | ~dst4[j]; +         break; +      case PIPE_LOGICOP_OR: +         for (j = 0; j < 4; j++) +            res4[j] = src4[j] | dst4[j]; +         break; +      case PIPE_LOGICOP_SET: +         for (j = 0; j < 4; j++) +            res4[j] = ~0; +         break; +      default: +         assert(0); +      } + +      for (j = 0; j < 4; j++) { +         quadColor[j][0] = ubyte_to_float(res[j][0]); +         quadColor[j][1] = ubyte_to_float(res[j][1]); +         quadColor[j][2] = ubyte_to_float(res[j][2]); +         quadColor[j][3] = ubyte_to_float(res[j][3]); +      } +   } + +   /* pass quad to next stage */ +   qs->next->run(qs->next, quad); +} + + + + +static void +blend_quad(struct quad_stage *qs, struct quad_header *quad) +{ +   static const float zero[4] = { 0, 0, 0, 0 }; +   static const float one[4] = { 1, 1, 1, 1 }; + +   struct softpipe_context *softpipe = qs->softpipe; +   uint cbuf; + +   if (softpipe->blend->logicop_enable) { +      logicop_quad(qs, quad); +      return; +   } + +   /* loop over colorbuffer outputs */ +   for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { +      float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; +      struct softpipe_cached_tile *tile +         = sp_get_cached_tile(softpipe, +                              softpipe->cbuf_cache[cbuf], +                              quad->input.x0, quad->input.y0); +      float (*quadColor)[4] = quad->output.color[cbuf]; +      uint i, j; + +      /* get/swizzle dest colors */ +      for (j = 0; j < QUAD_SIZE; j++) { +         int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); +         int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); +         for (i = 0; i < 4; i++) { +            dest[i][j] = tile->data.color[y][x][i]; +         } +      } + +      /* +       * Compute src/first term RGB +       */ +      switch (softpipe->blend->rgb_src_factor) { +      case PIPE_BLENDFACTOR_ONE: +         VEC4_COPY(source[0], quadColor[0]); /* R */ +         VEC4_COPY(source[1], quadColor[1]); /* G */ +         VEC4_COPY(source[2], quadColor[2]); /* B */ +         break; +      case PIPE_BLENDFACTOR_SRC_COLOR: +         VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ +         VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ +         VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ +         break; +      case PIPE_BLENDFACTOR_SRC_ALPHA: +         { +            const float *alpha = quadColor[3]; +            VEC4_MUL(source[0], quadColor[0], alpha); /* R */ +            VEC4_MUL(source[1], quadColor[1], alpha); /* G */ +            VEC4_MUL(source[2], quadColor[2], alpha); /* B */ +         } +         break; +      case PIPE_BLENDFACTOR_DST_COLOR: +         VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ +         VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ +         VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ +         break; +      case PIPE_BLENDFACTOR_DST_ALPHA: +         { +            const float *alpha = dest[3]; +            VEC4_MUL(source[0], quadColor[0], alpha); /* R */ +            VEC4_MUL(source[1], quadColor[1], alpha); /* G */ +            VEC4_MUL(source[2], quadColor[2], alpha); /* B */ +         } +         break; +      case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: +         { +            const float *alpha = quadColor[3]; +            float diff[4], temp[4]; +            VEC4_SUB(diff, one, dest[3]); +            VEC4_MIN(temp, alpha, diff); +            VEC4_MUL(source[0], quadColor[0], temp); /* R */ +            VEC4_MUL(source[1], quadColor[1], temp); /* G */ +            VEC4_MUL(source[2], quadColor[2], temp); /* B */ +         } +         break; +      case PIPE_BLENDFACTOR_CONST_COLOR: +         { +            float comp[4]; +            VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ +            VEC4_MUL(source[0], quadColor[0], comp); /* R */ +            VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ +            VEC4_MUL(source[1], quadColor[1], comp); /* G */ +            VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ +            VEC4_MUL(source[2], quadColor[2], comp); /* B */ +         } +         break; +      case PIPE_BLENDFACTOR_CONST_ALPHA: +         { +            float alpha[4]; +            VEC4_SCALAR(alpha, softpipe->blend_color.color[3]); +            VEC4_MUL(source[0], quadColor[0], alpha); /* R */ +            VEC4_MUL(source[1], quadColor[1], alpha); /* G */ +            VEC4_MUL(source[2], quadColor[2], alpha); /* B */ +         } +         break; +      case PIPE_BLENDFACTOR_SRC1_COLOR: +         assert(0); /* to do */ +         break; +      case PIPE_BLENDFACTOR_SRC1_ALPHA: +         assert(0); /* to do */ +         break; +      case PIPE_BLENDFACTOR_ZERO: +         VEC4_COPY(source[0], zero); /* R */ +         VEC4_COPY(source[1], zero); /* G */ +         VEC4_COPY(source[2], zero); /* B */ +         break; +      case PIPE_BLENDFACTOR_INV_SRC_COLOR: +         { +            float inv_comp[4]; +            VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ +            VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ +            VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ +            VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ +            VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ +            VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ +         } +         break; +      case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +         { +            float inv_alpha[4]; +            VEC4_SUB(inv_alpha, one, quadColor[3]); +            VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ +            VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ +            VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ +         } +         break; +      case PIPE_BLENDFACTOR_INV_DST_ALPHA: +         { +            float inv_alpha[4]; +            VEC4_SUB(inv_alpha, one, dest[3]); +            VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ +            VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ +            VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ +         } +         break; +      case PIPE_BLENDFACTOR_INV_DST_COLOR: +         { +            float inv_comp[4]; +            VEC4_SUB(inv_comp, one, dest[0]); /* R */ +            VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ +            VEC4_SUB(inv_comp, one, dest[1]); /* G */ +            VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ +            VEC4_SUB(inv_comp, one, dest[2]); /* B */ +            VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ +         } +         break; +      case PIPE_BLENDFACTOR_INV_CONST_COLOR: +         { +            float inv_comp[4]; +            /* R */ +            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); +            VEC4_MUL(source[0], quadColor[0], inv_comp); +            /* G */ +            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); +            VEC4_MUL(source[1], quadColor[1], inv_comp); +            /* B */ +            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); +            VEC4_MUL(source[2], quadColor[2], inv_comp); +         } +         break; +      case PIPE_BLENDFACTOR_INV_CONST_ALPHA: +         { +            float inv_alpha[4]; +            VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]); +            VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ +            VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ +            VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ +         } +         break; +      case PIPE_BLENDFACTOR_INV_SRC1_COLOR: +         assert(0); /* to do */ +         break; +      case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: +         assert(0); /* to do */ +         break; +      default: +         assert(0); +      } + +      /* +       * Compute src/first term A +       */ +      switch (softpipe->blend->alpha_src_factor) { +      case PIPE_BLENDFACTOR_ONE: +         VEC4_COPY(source[3], quadColor[3]); /* A */ +         break; +      case PIPE_BLENDFACTOR_SRC_COLOR: +         /* fall-through */ +      case PIPE_BLENDFACTOR_SRC_ALPHA: +         { +            const float *alpha = quadColor[3]; +            VEC4_MUL(source[3], quadColor[3], alpha); /* A */ +         } +         break; +      case PIPE_BLENDFACTOR_DST_COLOR: +         /* fall-through */ +      case PIPE_BLENDFACTOR_DST_ALPHA: +         VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ +         break; +      case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: +         /* multiply alpha by 1.0 */ +         VEC4_COPY(source[3], quadColor[3]); /* A */ +         break; +      case PIPE_BLENDFACTOR_CONST_COLOR: +         /* fall-through */ +      case PIPE_BLENDFACTOR_CONST_ALPHA: +         { +            float comp[4]; +            VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ +            VEC4_MUL(source[3], quadColor[3], comp); /* A */ +         } +         break; +      case PIPE_BLENDFACTOR_ZERO: +         VEC4_COPY(source[3], zero); /* A */ +         break; +      case PIPE_BLENDFACTOR_INV_SRC_COLOR: +         /* fall-through */ +      case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +         { +            float inv_alpha[4]; +            VEC4_SUB(inv_alpha, one, quadColor[3]); +            VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ +         } +         break; +      case PIPE_BLENDFACTOR_INV_DST_COLOR: +         /* fall-through */ +      case PIPE_BLENDFACTOR_INV_DST_ALPHA: +         { +            float inv_alpha[4]; +            VEC4_SUB(inv_alpha, one, dest[3]); +            VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ +         } +         break; +      case PIPE_BLENDFACTOR_INV_CONST_COLOR: +         /* fall-through */ +      case PIPE_BLENDFACTOR_INV_CONST_ALPHA: +         { +            float inv_comp[4]; +            /* A */ +            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); +            VEC4_MUL(source[3], quadColor[3], inv_comp); +         } +         break; +      default: +         assert(0); +      } + + +      /* +       * Compute dest/second term RGB +       */ +      switch (softpipe->blend->rgb_dst_factor) { +      case PIPE_BLENDFACTOR_ONE: +         /* dest = dest * 1   NO-OP, leave dest as-is */ +         break; +      case PIPE_BLENDFACTOR_SRC_COLOR: +         VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ +         VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ +         VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ +         break; +      case PIPE_BLENDFACTOR_SRC_ALPHA: +         VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ +         VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ +         VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ +         break; +      case PIPE_BLENDFACTOR_DST_ALPHA: +         VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ +         VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ +         VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ +         break; +      case PIPE_BLENDFACTOR_DST_COLOR: +         VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ +         VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ +         VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ +         break; +      case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: +         assert(0); /* illegal */ +         break; +      case PIPE_BLENDFACTOR_CONST_COLOR: +         { +            float comp[4]; +            VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ +            VEC4_MUL(dest[0], dest[0], comp); /* R */ +            VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ +            VEC4_MUL(dest[1], dest[1], comp); /* G */ +            VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ +            VEC4_MUL(dest[2], dest[2], comp); /* B */ +         } +         break; +      case PIPE_BLENDFACTOR_CONST_ALPHA: +         { +            float comp[4]; +            VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ +            VEC4_MUL(dest[0], dest[0], comp); /* R */ +            VEC4_MUL(dest[1], dest[1], comp); /* G */ +            VEC4_MUL(dest[2], dest[2], comp); /* B */ +         } +         break; +      case PIPE_BLENDFACTOR_ZERO: +         VEC4_COPY(dest[0], zero); /* R */ +         VEC4_COPY(dest[1], zero); /* G */ +         VEC4_COPY(dest[2], zero); /* B */ +         break; +      case PIPE_BLENDFACTOR_SRC1_COLOR: +      case PIPE_BLENDFACTOR_SRC1_ALPHA: +         /* XXX what are these? */ +         assert(0); +         break; +      case PIPE_BLENDFACTOR_INV_SRC_COLOR: +         { +            float inv_comp[4]; +            VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ +            VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ +            VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ +            VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ +            VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ +            VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ +         } +         break; +      case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +         { +            float one_minus_alpha[QUAD_SIZE]; +            VEC4_SUB(one_minus_alpha, one, quadColor[3]); +            VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ +            VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ +            VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ +         } +         break; +      case PIPE_BLENDFACTOR_INV_DST_ALPHA: +         { +            float inv_comp[4]; +            VEC4_SUB(inv_comp, one, dest[3]); /* A */ +            VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ +            VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ +            VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ +         } +         break; +      case PIPE_BLENDFACTOR_INV_DST_COLOR: +         { +            float inv_comp[4]; +            VEC4_SUB(inv_comp, one, dest[0]); /* R */ +            VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ +            VEC4_SUB(inv_comp, one, dest[1]); /* G */ +            VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ +            VEC4_SUB(inv_comp, one, dest[2]); /* B */ +            VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ +         } +         break; +      case PIPE_BLENDFACTOR_INV_CONST_COLOR: +         { +            float inv_comp[4]; +            /* R */ +            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); +            VEC4_MUL(dest[0], dest[0], inv_comp); +            /* G */ +            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); +            VEC4_MUL(dest[1], dest[1], inv_comp); +            /* B */ +            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); +            VEC4_MUL(dest[2], dest[2], inv_comp); +         } +         break; +      case PIPE_BLENDFACTOR_INV_CONST_ALPHA: +         { +            float inv_comp[4]; +            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); +            VEC4_MUL(dest[0], dest[0], inv_comp); +            VEC4_MUL(dest[1], dest[1], inv_comp); +            VEC4_MUL(dest[2], dest[2], inv_comp); +         } +         break; +      case PIPE_BLENDFACTOR_INV_SRC1_COLOR: +      case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: +         /* XXX what are these? */ +         assert(0); +         break; +      default: +         assert(0); +      } + +      /* +       * Compute dest/second term A +       */ +      switch (softpipe->blend->alpha_dst_factor) { +      case PIPE_BLENDFACTOR_ONE: +         /* dest = dest * 1   NO-OP, leave dest as-is */ +         break; +      case PIPE_BLENDFACTOR_SRC_COLOR: +         /* fall-through */ +      case PIPE_BLENDFACTOR_SRC_ALPHA: +         VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ +         break; +      case PIPE_BLENDFACTOR_DST_COLOR: +         /* fall-through */ +      case PIPE_BLENDFACTOR_DST_ALPHA: +         VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ +         break; +      case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: +         assert(0); /* illegal */ +         break; +      case PIPE_BLENDFACTOR_CONST_COLOR: +         /* fall-through */ +      case PIPE_BLENDFACTOR_CONST_ALPHA: +         { +            float comp[4]; +            VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ +            VEC4_MUL(dest[3], dest[3], comp); /* A */ +         } +         break; +      case PIPE_BLENDFACTOR_ZERO: +         VEC4_COPY(dest[3], zero); /* A */ +         break; +      case PIPE_BLENDFACTOR_INV_SRC_COLOR: +         /* fall-through */ +      case PIPE_BLENDFACTOR_INV_SRC_ALPHA: +         { +            float one_minus_alpha[QUAD_SIZE]; +            VEC4_SUB(one_minus_alpha, one, quadColor[3]); +            VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ +         } +         break; +      case PIPE_BLENDFACTOR_INV_DST_COLOR: +         /* fall-through */ +      case PIPE_BLENDFACTOR_INV_DST_ALPHA: +         { +            float inv_comp[4]; +            VEC4_SUB(inv_comp, one, dest[3]); /* A */ +            VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ +         } +         break; +      case PIPE_BLENDFACTOR_INV_CONST_COLOR: +         /* fall-through */ +      case PIPE_BLENDFACTOR_INV_CONST_ALPHA: +         { +            float inv_comp[4]; +            VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); +            VEC4_MUL(dest[3], dest[3], inv_comp); +         } +         break; +      default: +         assert(0); +      } + +      /* +       * Combine RGB terms +       */ +      switch (softpipe->blend->rgb_func) { +      case PIPE_BLEND_ADD: +         VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */ +         VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */ +         VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */ +         break; +      case PIPE_BLEND_SUBTRACT: +         VEC4_SUB(quadColor[0], source[0], dest[0]); /* R */ +         VEC4_SUB(quadColor[1], source[1], dest[1]); /* G */ +         VEC4_SUB(quadColor[2], source[2], dest[2]); /* B */ +         break; +      case PIPE_BLEND_REVERSE_SUBTRACT: +         VEC4_SUB(quadColor[0], dest[0], source[0]); /* R */ +         VEC4_SUB(quadColor[1], dest[1], source[1]); /* G */ +         VEC4_SUB(quadColor[2], dest[2], source[2]); /* B */ +         break; +      case PIPE_BLEND_MIN: +         VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ +         VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ +         VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ +         break; +      case PIPE_BLEND_MAX: +         VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ +         VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ +         VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ +         break; +      default: +         assert(0); +      } + +      /* +       * Combine A terms +       */ +      switch (softpipe->blend->alpha_func) { +      case PIPE_BLEND_ADD: +         VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */ +         break; +      case PIPE_BLEND_SUBTRACT: +         VEC4_SUB(quadColor[3], source[3], dest[3]); /* A */ +         break; +      case PIPE_BLEND_REVERSE_SUBTRACT: +         VEC4_SUB(quadColor[3], dest[3], source[3]); /* A */ +         break; +      case PIPE_BLEND_MIN: +         VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ +         break; +      case PIPE_BLEND_MAX: +         VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ +         break; +      default: +         assert(0); +      } + +   } /* cbuf loop */ + +   /* pass blended quad to next stage */ +   qs->next->run(qs->next, quad); +} + + +static void blend_begin(struct quad_stage *qs) +{ +   qs->next->begin(qs->next); +} + + +static void blend_destroy(struct quad_stage *qs) +{ +   FREE( qs ); +} + + +struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe ) +{ +   struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + +   stage->softpipe = softpipe; +   stage->begin = blend_begin; +   stage->run = blend_quad; +   stage->destroy = blend_destroy; + +   return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c new file mode 100644 index 0000000000..d7d6a6974d --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_bufloop.c @@ -0,0 +1,74 @@ + +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" + + +/** + * Loop over colorbuffers, passing quad to next stage each time. + */ +static void +cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad) +{ +   struct softpipe_context *softpipe = qs->softpipe; +   float tmp[PIPE_MAX_COLOR_BUFS][4][QUAD_SIZE]; +   unsigned i; + +   assert(sizeof(quad->outputs.color) == sizeof(tmp)); +   assert(softpipe->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS); + +   /* make copy of original colors since they can get modified +    * by blending and masking. +    * XXX we won't have to do this if the fragment program actually emits +    * N separate colors and we're drawing to N color buffers (MRT). +    * But if we emitted one color and glDrawBuffer(GL_FRONT_AND_BACK) is +    * in effect, we need to save/restore colors like this. +    */ +   memcpy(tmp, quad->outputs.color, sizeof(tmp)); + +   for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { +      /* set current cbuffer */ +#if 0 /* obsolete & going away */ +      softpipe->current_cbuf = i; +#endif + +      /* pass blended quad to next stage */ +      qs->next->run(qs->next, quad); + +      /* restore quad's colors for next buffer */ +      memcpy(quad->outputs.color, tmp, sizeof(tmp)); +   } +} + + +static void cbuf_loop_begin(struct quad_stage *qs) +{ +   qs->next->begin(qs->next); +} + + +static void cbuf_loop_destroy(struct quad_stage *qs) +{ +   FREE( qs ); +} + + +/** + * Create the colorbuffer loop stage. + * This is used to implement multiple render targets and GL_FRONT_AND_BACK + * rendering. + */ +struct quad_stage *sp_quad_bufloop_stage( struct softpipe_context *softpipe ) +{ +   struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + +   stage->softpipe = softpipe; +   stage->begin = cbuf_loop_begin; +   stage->run = cbuf_loop_quad; +   stage->destroy = cbuf_loop_destroy; + +   return stage; +} + diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c new file mode 100644 index 0000000000..563c2fc739 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -0,0 +1,116 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * \brief  quad colormask stage + * \author Brian Paul + */ + +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" +#include "sp_tile_cache.h" + + + +/** + * XXX colormask could be rolled into blending... + */ +static void +colormask_quad(struct quad_stage *qs, struct quad_header *quad) +{ +   struct softpipe_context *softpipe = qs->softpipe; +   uint cbuf; + +   /* loop over colorbuffer outputs */ +   for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { +      float dest[4][QUAD_SIZE]; +      struct softpipe_cached_tile *tile +         = sp_get_cached_tile(softpipe, +                              softpipe->cbuf_cache[cbuf], +                              quad->input.x0, quad->input.y0); +      float (*quadColor)[4] = quad->output.color[cbuf]; +      uint i, j; + +      /* get/swizzle dest colors */ +      for (j = 0; j < QUAD_SIZE; j++) { +         int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); +         int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); +         for (i = 0; i < 4; i++) { +            dest[i][j] = tile->data.color[y][x][i]; +         } +      } + +      /* R */ +      if (!(softpipe->blend->colormask & PIPE_MASK_R)) +          COPY_4V(quadColor[0], dest[0]); + +      /* G */ +      if (!(softpipe->blend->colormask & PIPE_MASK_G)) +          COPY_4V(quadColor[1], dest[1]); + +      /* B */ +      if (!(softpipe->blend->colormask & PIPE_MASK_B)) +          COPY_4V(quadColor[2], dest[2]); + +      /* A */ +      if (!(softpipe->blend->colormask & PIPE_MASK_A)) +          COPY_4V(quadColor[3], dest[3]); +   } + +   /* pass quad to next stage */ +   qs->next->run(qs->next, quad); +} + + +static void colormask_begin(struct quad_stage *qs) +{ +   qs->next->begin(qs->next); +} + + +static void colormask_destroy(struct quad_stage *qs) +{ +   FREE( qs ); +} + + +struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ) +{ +   struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + +   stage->softpipe = softpipe; +   stage->begin = colormask_begin; +   stage->run = colormask_quad; +   stage->destroy = colormask_destroy; + +   return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c new file mode 100644 index 0000000000..c27fd1482d --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -0,0 +1,93 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +/** + * \brief  Apply AA coverage to quad alpha valus + * \author  Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" + + +/** + * Multiply quad's alpha values by the fragment coverage. + */ +static void +coverage_quad(struct quad_stage *qs, struct quad_header *quad) +{ +   struct softpipe_context *softpipe = qs->softpipe; + +   if ((softpipe->rasterizer->poly_smooth && quad->input.prim == PRIM_TRI) || +       (softpipe->rasterizer->line_smooth && quad->input.prim == PRIM_LINE) || +       (softpipe->rasterizer->point_smooth && quad->input.prim == PRIM_POINT)) { +      uint cbuf; + +      /* loop over colorbuffer outputs */ +      for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { +         float (*quadColor)[4] = quad->output.color[cbuf]; +         unsigned j; +         for (j = 0; j < QUAD_SIZE; j++) { +            assert(quad->input.coverage[j] >= 0.0); +            assert(quad->input.coverage[j] <= 1.0); +         quadColor[3][j] *= quad->input.coverage[j]; +         } +      } +   } + +   qs->next->run(qs->next, quad); +} + + +static void coverage_begin(struct quad_stage *qs) +{ +   qs->next->begin(qs->next); +} + + +static void coverage_destroy(struct quad_stage *qs) +{ +   FREE( qs ); +} + + +struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ) +{ +   struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + +   stage->softpipe = softpipe; +   stage->begin = coverage_begin; +   stage->run = coverage_quad; +   stage->destroy = coverage_destroy; + +   return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c new file mode 100644 index 0000000000..523bd3e080 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -0,0 +1,290 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * \brief  Quad depth testing + */ + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" +#include "sp_tile_cache.h" + + +/** + * Do depth testing for a quad. + * Not static since it's used by the stencil code. + */ + +/* + * To increase efficiency, we should probably have multiple versions + * of this function that are specifically for Z16, Z32 and FP Z buffers. + * Try to effectively do that with codegen... + */ + +void +sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ +   struct softpipe_context *softpipe = qs->softpipe; +   struct pipe_surface *ps = softpipe->framebuffer.zsbuf; +   const enum pipe_format format = ps->format; +   unsigned bzzzz[QUAD_SIZE];  /**< Z values fetched from depth buffer */ +   unsigned qzzzz[QUAD_SIZE];  /**< Z values from the quad */ +   unsigned zmask = 0; +   unsigned j; +   struct softpipe_cached_tile *tile +      = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); + +   assert(ps); /* shouldn't get here if there's no zbuffer */ + +   /* +    * Convert quad's float depth values to int depth values (qzzzz). +    * If the Z buffer stores integer values, we _have_ to do the depth +    * compares with integers (not floats).  Otherwise, the float->int->float +    * conversion of Z values (which isn't an identity function) will cause +    * Z-fighting errors. +    * +    * Also, get the zbuffer values (bzzzz) from the cached tile. +    */ +   switch (format) { +   case PIPE_FORMAT_Z16_UNORM: +      { +         float scale = 65535.0; + +         for (j = 0; j < QUAD_SIZE; j++) { +            qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); +         } + +         for (j = 0; j < QUAD_SIZE; j++) { +            int x = quad->input.x0 % TILE_SIZE + (j & 1); +            int y = quad->input.y0 % TILE_SIZE + (j >> 1); +            bzzzz[j] = tile->data.depth16[y][x]; +         } +      } +      break; +   case PIPE_FORMAT_Z32_UNORM: +      { +         double scale = (double) (uint) ~0UL; + +         for (j = 0; j < QUAD_SIZE; j++) { +            qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); +         } + +         for (j = 0; j < QUAD_SIZE; j++) { +            int x = quad->input.x0 % TILE_SIZE + (j & 1); +            int y = quad->input.y0 % TILE_SIZE + (j >> 1); +            bzzzz[j] = tile->data.depth32[y][x]; +         } +      } +      break; +   case PIPE_FORMAT_X8Z24_UNORM: +      /* fall-through */ +   case PIPE_FORMAT_S8Z24_UNORM: +      { +         float scale = (float) ((1 << 24) - 1); + +         for (j = 0; j < QUAD_SIZE; j++) { +            qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); +         } + +         for (j = 0; j < QUAD_SIZE; j++) { +            int x = quad->input.x0 % TILE_SIZE + (j & 1); +            int y = quad->input.y0 % TILE_SIZE + (j >> 1); +            bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; +         } +      } +      break; +   case PIPE_FORMAT_Z24X8_UNORM: +      /* fall-through */ +   case PIPE_FORMAT_Z24S8_UNORM: +      { +         float scale = (float) ((1 << 24) - 1); + +         for (j = 0; j < QUAD_SIZE; j++) { +            qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); +         } + +         for (j = 0; j < QUAD_SIZE; j++) { +            int x = quad->input.x0 % TILE_SIZE + (j & 1); +            int y = quad->input.y0 % TILE_SIZE + (j >> 1); +            bzzzz[j] = tile->data.depth32[y][x] >> 8; +         } +      } +      break; +   default: +      assert(0); +   } + +   switch (softpipe->depth_stencil->depth.func) { +   case PIPE_FUNC_NEVER: +      /* zmask = 0 */ +      break; +   case PIPE_FUNC_LESS: +      /* Note this is pretty much a single sse or cell instruction.   +       * Like this:  quad->mask &= (quad->outputs.depth < zzzz); +       */ +      for (j = 0; j < QUAD_SIZE; j++) { +	 if (qzzzz[j] < bzzzz[j])  +	    zmask |= 1 << j; +      } +      break; +   case PIPE_FUNC_EQUAL: +      for (j = 0; j < QUAD_SIZE; j++) { +	 if (qzzzz[j] == bzzzz[j])  +	    zmask |= 1 << j; +      } +      break; +   case PIPE_FUNC_LEQUAL: +      for (j = 0; j < QUAD_SIZE; j++) { +	 if (qzzzz[j] <= bzzzz[j])  +	    zmask |= (1 << j); +      } +      break; +   case PIPE_FUNC_GREATER: +      for (j = 0; j < QUAD_SIZE; j++) { +	 if (qzzzz[j] > bzzzz[j])  +	    zmask |= (1 << j); +      } +      break; +   case PIPE_FUNC_NOTEQUAL: +      for (j = 0; j < QUAD_SIZE; j++) { +	 if (qzzzz[j] != bzzzz[j])  +	    zmask |= (1 << j); +      } +      break; +   case PIPE_FUNC_GEQUAL: +      for (j = 0; j < QUAD_SIZE; j++) { +	 if (qzzzz[j] >= bzzzz[j])  +	    zmask |= (1 << j); +      } +      break; +   case PIPE_FUNC_ALWAYS: +      zmask = MASK_ALL; +      break; +   default: +      assert(0); +   } + +   quad->inout.mask &= zmask; + +   if (softpipe->depth_stencil->depth.writemask) { +       +      /* This is also efficient with sse / spe instructions:  +       */ +      for (j = 0; j < QUAD_SIZE; j++) { +	 if (quad->inout.mask & (1 << j)) { +	    bzzzz[j] = qzzzz[j]; +	 } +      } + +      /* put updated Z values back into cached tile */ +      switch (format) { +      case PIPE_FORMAT_Z16_UNORM: +         for (j = 0; j < QUAD_SIZE; j++) { +            int x = quad->input.x0 % TILE_SIZE + (j & 1); +            int y = quad->input.y0 % TILE_SIZE + (j >> 1); +            tile->data.depth16[y][x] = (ushort) bzzzz[j]; +         } +         break; +      case PIPE_FORMAT_X8Z24_UNORM: +         /* fall-through */ +         /* (yes, this falls through to a different case than above) */ +      case PIPE_FORMAT_Z32_UNORM: +         for (j = 0; j < QUAD_SIZE; j++) { +            int x = quad->input.x0 % TILE_SIZE + (j & 1); +            int y = quad->input.y0 % TILE_SIZE + (j >> 1); +            tile->data.depth32[y][x] = bzzzz[j]; +         } +         break; +      case PIPE_FORMAT_S8Z24_UNORM: +         for (j = 0; j < QUAD_SIZE; j++) { +            int x = quad->input.x0 % TILE_SIZE + (j & 1); +            int y = quad->input.y0 % TILE_SIZE + (j >> 1); +            uint s8z24 = tile->data.depth32[y][x]; +            s8z24 = (s8z24 & 0xff000000) | bzzzz[j]; +            tile->data.depth32[y][x] = s8z24; +         } +         break; +      case PIPE_FORMAT_Z24S8_UNORM: +         for (j = 0; j < QUAD_SIZE; j++) { +            int x = quad->input.x0 % TILE_SIZE + (j & 1); +            int y = quad->input.y0 % TILE_SIZE + (j >> 1); +            uint z24s8 = tile->data.depth32[y][x]; +            z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 8); +            tile->data.depth32[y][x] = z24s8; +         } +         break; +      case PIPE_FORMAT_Z24X8_UNORM: +         for (j = 0; j < QUAD_SIZE; j++) { +            int x = quad->input.x0 % TILE_SIZE + (j & 1); +            int y = quad->input.y0 % TILE_SIZE + (j >> 1); +            tile->data.depth32[y][x] = bzzzz[j] << 8; +         } +         break; +      default: +         assert(0); +      } +   } +} + + +static void +depth_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ +   sp_depth_test_quad(qs, quad); + +   if (quad->inout.mask) +      qs->next->run(qs->next, quad); +} + + +static void depth_test_begin(struct quad_stage *qs) +{ +   qs->next->begin(qs->next); +} + + +static void depth_test_destroy(struct quad_stage *qs) +{ +   FREE( qs ); +} + + +struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ) +{ +   struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + +   stage->softpipe = softpipe; +   stage->begin = depth_test_begin; +   stage->run = depth_test_quad; +   stage->destroy = depth_test_destroy; + +   return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c new file mode 100644 index 0000000000..6e2dde304e --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c @@ -0,0 +1,88 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * \brief  Quad early-z testing + */ + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "sp_headers.h" +#include "sp_quad.h" + + +/** + * All this stage does is compute the quad's Z values (which is normally + * done by the shading stage). + * The next stage will do the actual depth test. + */ +static void +earlyz_quad( +   struct quad_stage    *qs, +   struct quad_header   *quad ) +{ +   const float fx = (float) quad->input.x0; +   const float fy = (float) quad->input.y0; +   const float dzdx = quad->posCoef->dadx[2]; +   const float dzdy = quad->posCoef->dady[2]; +   const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; + +   quad->output.depth[0] = z0; +   quad->output.depth[1] = z0 + dzdx; +   quad->output.depth[2] = z0 + dzdy; +   quad->output.depth[3] = z0 + dzdx + dzdy; + +   qs->next->run( qs->next, quad ); +} + +static void +earlyz_begin( +   struct quad_stage *qs ) +{ +   qs->next->begin( qs->next ); +} + +static void +earlyz_destroy( +   struct quad_stage *qs ) +{ +   FREE( qs ); +} + +struct quad_stage * +sp_quad_earlyz_stage( +   struct softpipe_context *softpipe ) +{ +   struct quad_stage *stage = CALLOC_STRUCT( quad_stage ); + +   stage->softpipe = softpipe; +   stage->begin = earlyz_begin; +   stage->run = earlyz_quad; +   stage->destroy = earlyz_destroy; + +   return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c new file mode 100644 index 0000000000..5dacbbe55f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -0,0 +1,189 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2008 VMware, Inc.  All rights reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Vertices are just an array of floats, with all the attributes + * packed.  We currently assume a layout like: + * + * attr[0][0..3] - window position + * attr[1..n][0..3] - remaining attributes. + * + * Attributes are assumed to be 4 floats wide but are packed so that + * all the enabled attributes run contiguously. + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "sp_texture.h" +#include "sp_tex_sample.h" + + +struct quad_shade_stage +{ +   struct quad_stage stage;  /**< base class */ +   struct tgsi_exec_machine machine; +   struct tgsi_exec_vector *inputs, *outputs; +}; + + +/** cast wrapper */ +static INLINE struct quad_shade_stage * +quad_shade_stage(struct quad_stage *qs) +{ +   return (struct quad_shade_stage *) qs; +} + + + +/** + * Execute fragment shader for the four fragments in the quad. + */ +static void +shade_quad( +   struct quad_stage *qs, +   struct quad_header *quad ) +{ +   struct quad_shade_stage *qss = quad_shade_stage( qs ); +   struct softpipe_context *softpipe = qs->softpipe; +   struct tgsi_exec_machine *machine = &qss->machine; +   boolean z_written; +    +   /* Consts do not require 16 byte alignment. */ +   machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; + +   machine->InterpCoefs = quad->coef; + +   /* run shader */ +   quad->inout.mask &= softpipe->fs->run( softpipe->fs,  +				    &qss->machine, +				    quad ); + +   /* store outputs */ +   z_written = FALSE; +   { +      const ubyte *sem_name = softpipe->fs->info.output_semantic_name; +      const ubyte *sem_index = softpipe->fs->info.output_semantic_index; +      const uint n = qss->stage.softpipe->fs->info.num_outputs; +      uint i; +      for (i = 0; i < n; i++) { +         switch (sem_name[i]) { +         case TGSI_SEMANTIC_COLOR: +            { +               uint cbuf = sem_index[i]; +               memcpy(quad->output.color[cbuf], +                      &machine->Outputs[i].xyzw[0].f[0], +                      sizeof(quad->output.color[0]) ); +            } +            break; +         case TGSI_SEMANTIC_POSITION: +            { +               uint j; +               for (j = 0; j < 4; j++) { +                  quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; +               } +               z_written = TRUE; +            } +            break; +         } +      } +   } + +   if (!z_written) { +      /* compute Z values now, as in the quad earlyz stage */ +      /* XXX we should really only do this if the earlyz stage is not used */ +      const float fx = (float) quad->input.x0; +      const float fy = (float) quad->input.y0; +      const float dzdx = quad->posCoef->dadx[2]; +      const float dzdy = quad->posCoef->dady[2]; +      const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; + +      quad->output.depth[0] = z0; +      quad->output.depth[1] = z0 + dzdx; +      quad->output.depth[2] = z0 + dzdy; +      quad->output.depth[3] = z0 + dzdx + dzdy; +   } + +   /* shader may cull fragments */ +   if( quad->inout.mask ) { +      qs->next->run( qs->next, quad ); +   } +} + +/** + * Per-primitive (or per-begin?) setup + */ +static void shade_begin(struct quad_stage *qs) +{ +   struct quad_shade_stage *qss = quad_shade_stage(qs); +   struct softpipe_context *softpipe = qs->softpipe; + +   softpipe->fs->prepare( softpipe->fs,  +			  &qss->machine, +			  (struct tgsi_sampler **) +                             softpipe->tgsi.frag_samplers_list ); + +   qs->next->begin(qs->next); +} + + +static void shade_destroy(struct quad_stage *qs) +{ +   struct quad_shade_stage *qss = (struct quad_shade_stage *) qs; + +   tgsi_exec_machine_free_data(&qss->machine); +   FREE( qss->inputs ); +   FREE( qss->outputs ); +   FREE( qs ); +} + + +struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) +{ +   struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage); + +   /* allocate storage for program inputs/outputs, aligned to 16 bytes */ +   qss->inputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->inputs) + 16); +   qss->outputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->outputs) + 16); +   qss->machine.Inputs = align16(qss->inputs); +   qss->machine.Outputs = align16(qss->outputs); + +   qss->stage.softpipe = softpipe; +   qss->stage.begin = shade_begin; +   qss->stage.run = shade_quad; +   qss->stage.destroy = shade_destroy; + +   tgsi_exec_machine_init( &qss->machine ); + +   return &qss->stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c new file mode 100644 index 0000000000..169bd82876 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c @@ -0,0 +1,85 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +/** + * \brief  Quad occlusion counter stage + * \author  Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" + +static unsigned count_bits( unsigned val ) +{ +   unsigned i; + +   for (i = 0; val ; val >>= 1) +      i += (val & 1); + +   return i; +} + +static void +occlusion_count_quad(struct quad_stage *qs, struct quad_header *quad) +{ +   struct softpipe_context *softpipe = qs->softpipe; + +   softpipe->occlusion_count += count_bits(quad->inout.mask); + +   qs->next->run(qs->next, quad); +} + + +static void occlusion_begin(struct quad_stage *qs) +{ +   qs->next->begin(qs->next); +} + + +static void occlusion_destroy(struct quad_stage *qs) +{ +   FREE( qs ); +} + + +struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ) +{ +   struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + +   stage->softpipe = softpipe; +   stage->begin = occlusion_begin; +   stage->run = occlusion_count_quad; +   stage->destroy = occlusion_destroy; + +   return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c new file mode 100644 index 0000000000..a37c8b4c39 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -0,0 +1,103 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" +#include "sp_tile_cache.h" + + +/** + * Last step of quad processing: write quad colors to the framebuffer, + * taking mask into account. + */ +static void +output_quad(struct quad_stage *qs, struct quad_header *quad) +{ +   /* in-tile pos: */ +   const int itx = quad->input.x0 % TILE_SIZE; +   const int ity = quad->input.y0 % TILE_SIZE; + +   struct softpipe_context *softpipe = qs->softpipe; +   uint cbuf; + +   /* loop over colorbuffer outputs */ +   for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { +      struct softpipe_cached_tile *tile +         = sp_get_cached_tile(softpipe, +                              softpipe->cbuf_cache[cbuf], +                              quad->input.x0, quad->input.y0); +      float (*quadColor)[4] = quad->output.color[cbuf]; +      int i, j; + +      /* get/swizzle dest colors */ +      for (j = 0; j < QUAD_SIZE; j++) { +         if (quad->inout.mask & (1 << j)) { +            int x = itx + (j & 1); +            int y = ity + (j >> 1); +            for (i = 0; i < 4; i++) { /* loop over color chans */ +               tile->data.color[y][x][i] = quadColor[i][j]; +            } +            if (0) { +               debug_printf("sp write pixel %d,%d: %g, %g, %g\n", +                            quad->input.x0 + x, +                            quad->input.y0 + y, +                            quadColor[0][j], +                            quadColor[1][j], +                            quadColor[2][j]); +            } +         } +      } +   } +} + + +static void output_begin(struct quad_stage *qs) +{ +   assert(qs->next == NULL); +} + + +static void output_destroy(struct quad_stage *qs) +{ +   FREE( qs ); +} + + +struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ) +{ +   struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + +   stage->softpipe = softpipe; +   stage->begin = output_begin; +   stage->run = output_quad; +   stage->destroy = output_destroy; + +   return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c new file mode 100644 index 0000000000..7495515764 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c @@ -0,0 +1,352 @@ + +/** + * \brief Quad stencil testing + */ + + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_quad.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" + + +/** Only 8-bit stencil supported */ +#define STENCIL_MAX 0xff + + +/** + * Do the basic stencil test (compare stencil buffer values against the + * reference value. + * + * \param stencilVals  the stencil values from the stencil buffer + * \param func  the stencil func (PIPE_FUNC_x) + * \param ref  the stencil reference value + * \param valMask  the stencil value mask indicating which bits of the stencil + *                 values and ref value are to be used. + * \return mask indicating which pixels passed the stencil test + */ +static unsigned +do_stencil_test(const ubyte stencilVals[QUAD_SIZE], unsigned func, +                unsigned ref, unsigned valMask) +{ +   unsigned passMask = 0x0; +   unsigned j; + +   ref &= valMask; + +   switch (func) { +   case PIPE_FUNC_NEVER: +      /* passMask = 0x0 */ +      break; +   case PIPE_FUNC_LESS: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (ref < (stencilVals[j] & valMask)) { +            passMask |= (1 << j); +         } +      } +      break; +   case PIPE_FUNC_EQUAL: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (ref == (stencilVals[j] & valMask)) { +            passMask |= (1 << j); +         } +      } +      break; +   case PIPE_FUNC_LEQUAL: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (ref <= (stencilVals[j] & valMask)) { +            passMask |= (1 << j); +         } +      } +      break; +   case PIPE_FUNC_GREATER: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (ref > (stencilVals[j] & valMask)) { +            passMask |= (1 << j); +         } +      } +      break; +   case PIPE_FUNC_NOTEQUAL: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (ref != (stencilVals[j] & valMask)) { +            passMask |= (1 << j); +         } +      } +      break; +   case PIPE_FUNC_GEQUAL: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (ref >= (stencilVals[j] & valMask)) { +            passMask |= (1 << j); +         } +      } +      break; +   case PIPE_FUNC_ALWAYS: +      passMask = MASK_ALL; +      break; +   default: +      assert(0); +   } + +   return passMask; +} + + +/** + * Apply the stencil operator to stencil values. + * + * \param stencilVals  the stencil buffer values (read and written) + * \param mask  indicates which pixels to update + * \param op  the stencil operator (PIPE_STENCIL_OP_x) + * \param ref  the stencil reference value + * \param wrtMask  writemask controlling which bits are changed in the + *                 stencil values + */ +static void +apply_stencil_op(ubyte stencilVals[QUAD_SIZE], +                 unsigned mask, unsigned op, ubyte ref, ubyte wrtMask) +{ +   unsigned j; +   ubyte newstencil[QUAD_SIZE]; + +   for (j = 0; j < QUAD_SIZE; j++) { +      newstencil[j] = stencilVals[j]; +   } + +   switch (op) { +   case PIPE_STENCIL_OP_KEEP: +      /* no-op */ +      break; +   case PIPE_STENCIL_OP_ZERO: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (mask & (1 << j)) { +            newstencil[j] = 0; +         } +      } +      break; +   case PIPE_STENCIL_OP_REPLACE: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (mask & (1 << j)) { +            newstencil[j] = ref; +         } +      } +      break; +   case PIPE_STENCIL_OP_INCR: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (mask & (1 << j)) { +            if (stencilVals[j] < STENCIL_MAX) { +               newstencil[j] = stencilVals[j] + 1; +            } +         } +      } +      break; +   case PIPE_STENCIL_OP_DECR: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (mask & (1 << j)) { +            if (stencilVals[j] > 0) { +               newstencil[j] = stencilVals[j] - 1; +            } +         } +      } +      break; +   case PIPE_STENCIL_OP_INCR_WRAP: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (mask & (1 << j)) { +            newstencil[j] = stencilVals[j] + 1; +         } +      } +      break; +   case PIPE_STENCIL_OP_DECR_WRAP: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (mask & (1 << j)) { +            newstencil[j] = stencilVals[j] - 1; +         } +      } +      break; +   case PIPE_STENCIL_OP_INVERT: +      for (j = 0; j < QUAD_SIZE; j++) { +         if (mask & (1 << j)) { +            newstencil[j] = ~stencilVals[j]; +         } +      } +      break; +   default: +      assert(0); +   } + +   /* +    * update the stencil values +    */ +   if (wrtMask != STENCIL_MAX) { +      /* apply bit-wise stencil buffer writemask */ +      for (j = 0; j < QUAD_SIZE; j++) { +         stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & stencilVals[j]); +      } +   } +   else { +      for (j = 0; j < QUAD_SIZE; j++) { +         stencilVals[j] = newstencil[j]; +      } +   } +} + + +/** + * Do stencil (and depth) testing.  Stenciling depends on the outcome of + * depth testing. + */ +static void +stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ +   struct softpipe_context *softpipe = qs->softpipe; +   struct pipe_surface *ps = softpipe->framebuffer.zsbuf; +   unsigned func, zFailOp, zPassOp, failOp; +   ubyte ref, wrtMask, valMask; +   ubyte stencilVals[QUAD_SIZE]; +   struct softpipe_cached_tile *tile +      = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); +   uint j; +   uint face = quad->input.facing; + +   if (!softpipe->depth_stencil->stencil[1].enabled) { +      /* single-sided stencil test, use front (face=0) state */ +      face = 0; +   } + +   /* choose front or back face function, operator, etc */ +   /* XXX we could do these initializations once per primitive */ +   func    = softpipe->depth_stencil->stencil[face].func; +   failOp  = softpipe->depth_stencil->stencil[face].fail_op; +   zFailOp = softpipe->depth_stencil->stencil[face].zfail_op; +   zPassOp = softpipe->depth_stencil->stencil[face].zpass_op; +   ref     = softpipe->depth_stencil->stencil[face].ref_value; +   wrtMask = softpipe->depth_stencil->stencil[face].writemask; +   valMask = softpipe->depth_stencil->stencil[face].valuemask; + +   assert(ps); /* shouldn't get here if there's no stencil buffer */ + +   /* get stencil values from cached tile */ +   switch (ps->format) { +   case PIPE_FORMAT_S8Z24_UNORM: +      for (j = 0; j < QUAD_SIZE; j++) { +         int x = quad->input.x0 % TILE_SIZE + (j & 1); +         int y = quad->input.y0 % TILE_SIZE + (j >> 1); +         stencilVals[j] = tile->data.depth32[y][x] >> 24; +      } +      break; +   case PIPE_FORMAT_Z24S8_UNORM: +      for (j = 0; j < QUAD_SIZE; j++) { +         int x = quad->input.x0 % TILE_SIZE + (j & 1); +         int y = quad->input.y0 % TILE_SIZE + (j >> 1); +         stencilVals[j] = tile->data.depth32[y][x] & 0xff; +      } +      break; +   case PIPE_FORMAT_S8_UNORM: +      for (j = 0; j < QUAD_SIZE; j++) { +         int x = quad->input.x0 % TILE_SIZE + (j & 1); +         int y = quad->input.y0 % TILE_SIZE + (j >> 1); +         stencilVals[j] = tile->data.stencil8[y][x]; +      } +      break; +   default: +      assert(0); +   } + +   /* do the stencil test first */ +   { +      unsigned passMask, failMask; +      passMask = do_stencil_test(stencilVals, func, ref, valMask); +      failMask = quad->inout.mask & ~passMask; +      quad->inout.mask &= passMask; + +      if (failOp != PIPE_STENCIL_OP_KEEP) { +         apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask); +      } +   } + +   if (quad->inout.mask) { +      /* now the pixels that passed the stencil test are depth tested */ +      if (softpipe->depth_stencil->depth.enabled) { +         const unsigned origMask = quad->inout.mask; + +         sp_depth_test_quad(qs, quad);  /* quad->mask is updated */ + +         /* update stencil buffer values according to z pass/fail result */ +         if (zFailOp != PIPE_STENCIL_OP_KEEP) { +            const unsigned failMask = origMask & ~quad->inout.mask; +            apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask); +         } + +         if (zPassOp != PIPE_STENCIL_OP_KEEP) { +            const unsigned passMask = origMask & quad->inout.mask; +            apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask); +         } +      } +      else { +         /* no depth test, apply Zpass operator to stencil buffer values */ +         apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask); +      } + +   } + +   /* put new stencil values into cached tile */ +   switch (ps->format) { +   case PIPE_FORMAT_S8Z24_UNORM: +      for (j = 0; j < QUAD_SIZE; j++) { +         int x = quad->input.x0 % TILE_SIZE + (j & 1); +         int y = quad->input.y0 % TILE_SIZE + (j >> 1); +         uint s8z24 = tile->data.depth32[y][x]; +         s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff); +         tile->data.depth32[y][x] = s8z24; +      } +      break; +   case PIPE_FORMAT_Z24S8_UNORM: +      for (j = 0; j < QUAD_SIZE; j++) { +         int x = quad->input.x0 % TILE_SIZE + (j & 1); +         int y = quad->input.y0 % TILE_SIZE + (j >> 1); +         uint z24s8 = tile->data.depth32[y][x]; +         z24s8 = (z24s8 & 0xffffff00) | stencilVals[j]; +         tile->data.depth32[y][x] = z24s8; +      } +      break; +   case PIPE_FORMAT_S8_UNORM: +      for (j = 0; j < QUAD_SIZE; j++) { +         int x = quad->input.x0 % TILE_SIZE + (j & 1); +         int y = quad->input.y0 % TILE_SIZE + (j >> 1); +         tile->data.stencil8[y][x] = stencilVals[j]; +      } +      break; +   default: +      assert(0); +   } + +   if (quad->inout.mask) +      qs->next->run(qs->next, quad); +} + + +static void stencil_begin(struct quad_stage *qs) +{ +   qs->next->begin(qs->next); +} + + +static void stencil_destroy(struct quad_stage *qs) +{ +   FREE( qs ); +} + + +struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe ) +{ +   struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + +   stage->softpipe = softpipe; +   stage->begin = stencil_begin; +   stage->run = stencil_test_quad; +   stage->destroy = stencil_destroy; + +   return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c new file mode 100644 index 0000000000..ccf37f6be5 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -0,0 +1,94 @@ + +/** + * quad polygon stipple stage + */ + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" + + +/** + * Apply polygon stipple to quads produced by triangle rasterization + */ +static void +stipple_quad(struct quad_stage *qs, struct quad_header *quad) +{ +   static const uint bit31 = 1 << 31; +   static const uint bit30 = 1 << 30; + +   if (quad->input.prim == PRIM_TRI) { +      struct softpipe_context *softpipe = qs->softpipe; +      /* need to invert Y to index into OpenGL's stipple pattern */ +      int y0, y1; +      uint stipple0, stipple1; +      if (softpipe->rasterizer->origin_lower_left) { +         y0 = softpipe->framebuffer.height - 1 - quad->input.y0; +         y1 = y0 - 1; +      } +      else { +         y0 = quad->input.y0; +         y1 = y0 + 1; +      } +      stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; +      stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; + +#if 1 +      { +      const int col0 = quad->input.x0 % 32; +      if ((stipple0 & (bit31 >> col0)) == 0) +         quad->inout.mask &= ~MASK_TOP_LEFT; + +      if ((stipple0 & (bit30 >> col0)) == 0) +         quad->inout.mask &= ~MASK_TOP_RIGHT; + +      if ((stipple1 & (bit31 >> col0)) == 0) +         quad->inout.mask &= ~MASK_BOTTOM_LEFT; + +      if ((stipple1 & (bit30 >> col0)) == 0) +         quad->inout.mask &= ~MASK_BOTTOM_RIGHT; +      } +#else +      /* We'd like to use this code, but we'd need to redefine +       * MASK_TOP_LEFT to be (1 << 1) and MASK_TOP_RIGHT to be (1 << 0), +       * and similarly for the BOTTOM bits.  But that may have undesirable +       * side effects elsewhere. +       */ +      const int col0 = 30 - (quad->input.x0 % 32); +      quad->inout.mask &= (((stipple0 >> col0) & 0x3) |  +                     (((stipple1 >> col0) & 0x3) << 2)); +#endif +      if (!quad->inout.mask) +         return; +   } + +   qs->next->run(qs->next, quad); +} + + +static void stipple_begin(struct quad_stage *qs) +{ +   qs->next->begin(qs->next); +} + + +static void stipple_destroy(struct quad_stage *qs) +{ +   FREE( qs ); +} + + +struct quad_stage * +sp_quad_polygon_stipple_stage( struct softpipe_context *softpipe ) +{ +   struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + +   stage->softpipe = softpipe; +   stage->begin = stipple_begin; +   stage->run = stipple_quad; +   stage->destroy = stipple_destroy; + +   return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c new file mode 100644 index 0000000000..b0d8e01426 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -0,0 +1,107 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Author: + *    Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_query.h" + +struct softpipe_query { +   uint64_t start; +   uint64_t end; +}; + + +static struct softpipe_query *softpipe_query( struct pipe_query *p ) +{ +   return (struct softpipe_query *)p; +} + +static struct pipe_query * +softpipe_create_query(struct pipe_context *pipe,  +		      unsigned type) +{ +   assert(type == PIPE_QUERY_OCCLUSION_COUNTER); +   return (struct pipe_query *)CALLOC_STRUCT( softpipe_query ); +} + + +static void +softpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q) +{ +   FREE(q); +} + + +static void +softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) +{ +   struct softpipe_context *softpipe = softpipe_context( pipe ); +   struct softpipe_query *sq = softpipe_query(q); +    +   sq->start = softpipe->occlusion_count; +} + + +static void +softpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) +{ +   struct softpipe_context *softpipe = softpipe_context( pipe ); +   struct softpipe_query *sq = softpipe_query(q); + +   sq->end = softpipe->occlusion_count; +} + + +static boolean +softpipe_get_query_result(struct pipe_context *pipe,  +			  struct pipe_query *q, +			  boolean wait, +			  uint64_t *result ) +{ +   struct softpipe_query *sq = softpipe_query(q); +   *result = sq->end - sq->start; +   return TRUE; +} + + +void softpipe_init_query_funcs(struct softpipe_context *softpipe ) +{ +   softpipe->pipe.create_query = softpipe_create_query; +   softpipe->pipe.destroy_query = softpipe_destroy_query; +   softpipe->pipe.begin_query = softpipe_begin_query; +   softpipe->pipe.end_query = softpipe_end_query; +   softpipe->pipe.get_query_result = softpipe_get_query_result; +} + + diff --git a/src/gallium/drivers/softpipe/sp_query.h b/src/gallium/drivers/softpipe/sp_query.h new file mode 100644 index 0000000000..05060a4575 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_query.h @@ -0,0 +1,39 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Author: + *    Keith Whitwell + */ + +#ifndef SP_QUERY_H +#define SP_QUERY_H + +struct softpipe_context; +extern void softpipe_init_query_funcs(struct softpipe_context * ); + + +#endif /* SP_QUERY_H */ diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c new file mode 100644 index 0000000000..7380a6ae2b --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -0,0 +1,181 @@ +/************************************************************************** + *  + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + + +#include "util/u_memory.h" +#include "util/u_simple_screen.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" + +#include "sp_texture.h" +#include "sp_winsys.h" +#include "sp_screen.h" + + +static const char * +softpipe_get_vendor(struct pipe_screen *screen) +{ +   return "Tungsten Graphics, Inc."; +} + + +static const char * +softpipe_get_name(struct pipe_screen *screen) +{ +   return "softpipe"; +} + + +static int +softpipe_get_param(struct pipe_screen *screen, int param) +{ +   switch (param) { +   case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: +      return PIPE_MAX_SAMPLERS; +   case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: +      return PIPE_MAX_SAMPLERS; +   case PIPE_CAP_NPOT_TEXTURES: +      return 1; +   case PIPE_CAP_TWO_SIDED_STENCIL: +      return 1; +   case PIPE_CAP_GLSL: +      return 1; +   case PIPE_CAP_S3TC: +      return 0; +   case PIPE_CAP_ANISOTROPIC_FILTER: +      return 0; +   case PIPE_CAP_POINT_SPRITE: +      return 1; +   case PIPE_CAP_MAX_RENDER_TARGETS: +      return PIPE_MAX_COLOR_BUFS; +   case PIPE_CAP_OCCLUSION_QUERY: +      return 1; +   case PIPE_CAP_TEXTURE_MIRROR_CLAMP: +      return 1; +   case PIPE_CAP_TEXTURE_MIRROR_REPEAT: +      return 1; +   case PIPE_CAP_TEXTURE_SHADOW_MAP: +      return 1; +   case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: +      return 12; /* max 2Kx2K */ +   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: +      return 8;  /* max 128x128x128 */ +   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: +      return 12; /* max 2Kx2K */ +   default: +      return 0; +   } +} + + +static float +softpipe_get_paramf(struct pipe_screen *screen, int param) +{ +   switch (param) { +   case PIPE_CAP_MAX_LINE_WIDTH: +      /* fall-through */ +   case PIPE_CAP_MAX_LINE_WIDTH_AA: +      return 255.0; /* arbitrary */ +   case PIPE_CAP_MAX_POINT_WIDTH: +      /* fall-through */ +   case PIPE_CAP_MAX_POINT_WIDTH_AA: +      return 255.0; /* arbitrary */ +   case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: +      return 0.0; +   case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: +      return 16.0; /* arbitrary */ +   default: +      return 0; +   } +} + + +/** + * Query format support for creating a texture, drawing surface, etc. + * \param format  the format to test + * \param type  one of PIPE_TEXTURE, PIPE_SURFACE + */ +static boolean +softpipe_is_format_supported( struct pipe_screen *screen, +                              enum pipe_format format,  +                              enum pipe_texture_target target, +                              unsigned tex_usage,  +                              unsigned geom_flags ) +{ +   switch(format) { +   case PIPE_FORMAT_DXT1_RGB: +   case PIPE_FORMAT_DXT1_RGBA: +   case PIPE_FORMAT_DXT3_RGBA: +   case PIPE_FORMAT_DXT5_RGBA: +      return FALSE; +   default: +      return TRUE; +   } +} + + +static void +softpipe_destroy_screen( struct pipe_screen *screen ) +{ +   struct pipe_winsys *winsys = screen->winsys; + +   if(winsys->destroy) +      winsys->destroy(winsys); + +   FREE(screen); +} + + + +/** + * Create a new pipe_screen object + * Note: we're not presently subclassing pipe_screen (no softpipe_screen). + */ +struct pipe_screen * +softpipe_create_screen(struct pipe_winsys *winsys) +{ +   struct softpipe_screen *screen = CALLOC_STRUCT(softpipe_screen); + +   if (!screen) +      return NULL; + +   screen->base.winsys = winsys; + +   screen->base.destroy = softpipe_destroy_screen; + +   screen->base.get_name = softpipe_get_name; +   screen->base.get_vendor = softpipe_get_vendor; +   screen->base.get_param = softpipe_get_param; +   screen->base.get_paramf = softpipe_get_paramf; +   screen->base.is_format_supported = softpipe_is_format_supported; + +   softpipe_init_screen_texture_funcs(&screen->base); +   u_simple_screen_init(&screen->base); + +   return &screen->base; +} diff --git a/src/gallium/drivers/softpipe/sp_screen.h b/src/gallium/drivers/softpipe/sp_screen.h new file mode 100644 index 0000000000..3d4bfd3e84 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_screen.h @@ -0,0 +1,58 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_SCREEN_H +#define SP_SCREEN_H + +#include "pipe/p_screen.h" +#include "pipe/p_defines.h" + + + +struct softpipe_screen { +   struct pipe_screen base; + +   /* Increments whenever textures are modified.  Contexts can track +    * this. +    */ +   unsigned timestamp;           +}; + + + + +static INLINE struct softpipe_screen * +softpipe_screen( struct pipe_screen *pipe ) +{ +   return (struct softpipe_screen *)pipe; +} + + +#endif /* SP_SCREEN_H */ diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c new file mode 100644 index 0000000000..b1adb9cb7a --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -0,0 +1,1569 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief  Primitive rasterization/rendering (points, lines, triangles) + * + * \author  Keith Whitwell <keith@tungstengraphics.com> + * \author  Brian Paul + */ + +#include "sp_setup.h" + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "sp_state.h" +#include "sp_prim_setup.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_thread.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + +#define DEBUG_VERTS 0 +#define DEBUG_FRAGS 0 + +/** + * Triangle edge info + */ +struct edge { +   float dx;		/**< X(v1) - X(v0), used only during setup */ +   float dy;		/**< Y(v1) - Y(v0), used only during setup */ +   float dxdy;		/**< dx/dy */ +   float sx, sy;	/**< first sample point coord */ +   int lines;		/**< number of lines on this edge */ +}; + +#if SP_NUM_QUAD_THREADS > 1 + +/* Set to 1 if you want other threads to be instantly + * notified of pending jobs. + */ +#define INSTANT_NOTEMPTY_NOTIFY 0 + +struct thread_info +{ +   struct setup_context *setup; +   uint id; +   pipe_thread handle; +}; + +struct quad_job; + +typedef void (* quad_job_routine)( struct setup_context *setup, uint thread, struct quad_job *job ); + +struct quad_job +{ +   struct quad_header_input input; +   struct quad_header_inout inout; +   quad_job_routine routine; +}; + +#define NUM_QUAD_JOBS 64 + +struct quad_job_que +{ +   struct quad_job jobs[NUM_QUAD_JOBS]; +   uint first; +   uint last; +   pipe_mutex que_mutex; +   pipe_condvar que_notfull_condvar; +   pipe_condvar que_notempty_condvar; +   uint jobs_added; +   uint jobs_done; +   pipe_condvar que_done_condvar; +}; + +static void +add_quad_job( struct quad_job_que *que, struct quad_header *quad, quad_job_routine routine ) +{ +#if INSTANT_NOTEMPTY_NOTIFY +   boolean empty; +#endif + +   /* Wait for empty slot, see if the que is empty. +    */ +   pipe_mutex_lock( que->que_mutex ); +   while ((que->last + 1) % NUM_QUAD_JOBS == que->first) { +#if !INSTANT_NOTEMPTY_NOTIFY +      pipe_condvar_broadcast( que->que_notempty_condvar ); +#endif +      pipe_condvar_wait( que->que_notfull_condvar, que->que_mutex ); +   } +#if INSTANT_NOTEMPTY_NOTIFY +   empty = que->last == que->first; +#endif +   que->jobs_added++; +   pipe_mutex_unlock( que->que_mutex ); + +   /* Submit new job. +    */ +   que->jobs[que->last].input = quad->input; +   que->jobs[que->last].inout = quad->inout; +   que->jobs[que->last].routine = routine; +   que->last = (que->last + 1) % NUM_QUAD_JOBS; + +#if INSTANT_NOTEMPTY_NOTIFY +   /* If the que was empty, notify consumers there's a job to be done. +    */ +   if (empty) { +      pipe_mutex_lock( que->que_mutex ); +      pipe_condvar_broadcast( que->que_notempty_condvar ); +      pipe_mutex_unlock( que->que_mutex ); +   } +#endif +} + +#endif + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_context { +   struct softpipe_context *softpipe; + +   /* Vertices are just an array of floats making up each attribute in +    * turn.  Currently fixed at 4 floats, but should change in time. +    * Codegen will help cope with this. +    */ +   const float (*vmax)[4]; +   const float (*vmid)[4]; +   const float (*vmin)[4]; +   const float (*vprovoke)[4]; + +   struct edge ebot; +   struct edge etop; +   struct edge emaj; + +   float oneoverarea; + +   struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; +   struct tgsi_interp_coef posCoef;  /* For Z, W */ +   struct quad_header quad; + +#if SP_NUM_QUAD_THREADS > 1 +   struct quad_job_que que; +   struct thread_info threads[SP_NUM_QUAD_THREADS]; +#endif + +   struct { +      int left[2];   /**< [0] = row0, [1] = row1 */ +      int right[2]; +      int y; +      unsigned y_flags; +      unsigned mask;     /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ +   } span; + +#if DEBUG_FRAGS +   uint numFragsEmitted;  /**< per primitive */ +   uint numFragsWritten;  /**< per primitive */ +#endif + +   unsigned winding;		/* which winding to cull */ +}; + +#if SP_NUM_QUAD_THREADS > 1 + +static PIPE_THREAD_ROUTINE( quad_thread, param ) +{ +   struct thread_info *info = (struct thread_info *) param; +   struct quad_job_que *que = &info->setup->que; + +   for (;;) { +      struct quad_job job; +      boolean full; + +      /* Wait for an available job. +       */ +      pipe_mutex_lock( que->que_mutex ); +      while (que->last == que->first) +         pipe_condvar_wait( que->que_notempty_condvar, que->que_mutex ); + +      /* See if the que is full. +       */ +      full = (que->last + 1) % NUM_QUAD_JOBS == que->first; + +      /* Take a job and remove it from que. +       */ +      job = que->jobs[que->first]; +      que->first = (que->first + 1) % NUM_QUAD_JOBS; + +      /* Notify the producer if the que is not full. +       */ +      if (full) +         pipe_condvar_signal( que->que_notfull_condvar ); +      pipe_mutex_unlock( que->que_mutex ); + +      job.routine( info->setup, info->id, &job ); + +      /* Notify the producer if that's the last finished job. +       */ +      pipe_mutex_lock( que->que_mutex ); +      que->jobs_done++; +      if (que->jobs_added == que->jobs_done) +         pipe_condvar_signal( que->que_done_condvar ); +      pipe_mutex_unlock( que->que_mutex ); +   } + +   return NULL; +} + +#define WAIT_FOR_COMPLETION(setup) \ +   do {\ +      pipe_mutex_lock( setup->que.que_mutex );\ +      if (!INSTANT_NOTEMPTY_NOTIFY)\ +         pipe_condvar_broadcast( setup->que.que_notempty_condvar );\ +      while (setup->que.jobs_added != setup->que.jobs_done)\ +         pipe_condvar_wait( setup->que.que_done_condvar, setup->que.que_mutex );\ +      pipe_mutex_unlock( setup->que.que_mutex );\ +   } while (0) + +#else + +#define WAIT_FOR_COMPLETION(setup) ((void) 0) + +#endif + +/** + * Test if x is NaN or +/- infinity. + */ +static INLINE boolean +is_inf_or_nan(float x) +{ +   union fi tmp; +   tmp.f = x; +   return !(int)((unsigned int)((tmp.i & 0x7fffffff)-0x7f800000) >> 31); +} + + +static boolean cull_tri( struct setup_context *setup, +		      float det ) +{ +   if (det != 0)  +   {    +      /* if (det < 0 then Z points toward camera and triangle is  +       * counter-clockwise winding. +       */ +      unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; +       +      if ((winding & setup->winding) == 0)  +	 return FALSE; +   } + +   /* Culled: +    */ +   return TRUE; +} + + + +/** + * Clip setup->quad against the scissor/surface bounds. + */ +static INLINE void +quad_clip( struct setup_context *setup, struct quad_header *quad ) +{ +   const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; +   const int minx = (int) cliprect->minx; +   const int maxx = (int) cliprect->maxx; +   const int miny = (int) cliprect->miny; +   const int maxy = (int) cliprect->maxy; + +   if (quad->input.x0 >= maxx || +       quad->input.y0 >= maxy || +       quad->input.x0 + 1 < minx || +       quad->input.y0 + 1 < miny) { +      /* totally clipped */ +      quad->inout.mask = 0x0; +      return; +   } +   if (quad->input.x0 < minx) +      quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); +   if (quad->input.y0 < miny) +      quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); +   if (quad->input.x0 == maxx - 1) +      quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); +   if (quad->input.y0 == maxy - 1) +      quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); +} + + +/** + * Emit a quad (pass to next stage) with clipping. + */ +static INLINE void +clip_emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) +{ +   quad_clip( setup, quad ); +   if (quad->inout.mask) { +      struct softpipe_context *sp = setup->softpipe; + +      sp->quad[thread].first->run( sp->quad[thread].first, quad ); +   } +} + +#if SP_NUM_QUAD_THREADS > 1 + +static void +clip_emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) +{ +   struct quad_header quad; + +   quad.input = job->input; +   quad.inout = job->inout; +   quad.coef = setup->quad.coef; +   quad.posCoef = setup->quad.posCoef; +   quad.nr_attrs = setup->quad.nr_attrs; +   clip_emit_quad( setup, &quad, thread ); +} + +#define CLIP_EMIT_QUAD(setup) add_quad_job( &setup->que, &setup->quad, clip_emit_quad_job ) + +#else + +#define CLIP_EMIT_QUAD(setup) clip_emit_quad( setup, &setup->quad, 0 ) + +#endif + +/** + * Emit a quad (pass to next stage).  No clipping is done. + */ +static INLINE void +emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) +{ +   struct softpipe_context *sp = setup->softpipe; +#if DEBUG_FRAGS +   uint mask = quad->inout.mask; +#endif + +#if DEBUG_FRAGS +   if (mask & 1) setup->numFragsEmitted++; +   if (mask & 2) setup->numFragsEmitted++; +   if (mask & 4) setup->numFragsEmitted++; +   if (mask & 8) setup->numFragsEmitted++; +#endif +   sp->quad[thread].first->run( sp->quad[thread].first, quad ); +#if DEBUG_FRAGS +   mask = quad->inout.mask; +   if (mask & 1) setup->numFragsWritten++; +   if (mask & 2) setup->numFragsWritten++; +   if (mask & 4) setup->numFragsWritten++; +   if (mask & 8) setup->numFragsWritten++; +#endif +} + +#if SP_NUM_QUAD_THREADS > 1 + +static void +emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) +{ +   struct quad_header quad; + +   quad.input = job->input; +   quad.inout = job->inout; +   quad.coef = setup->quad.coef; +   quad.posCoef = setup->quad.posCoef; +   quad.nr_attrs = setup->quad.nr_attrs; +   emit_quad( setup, &quad, thread ); +} + +#define EMIT_QUAD(setup,x,y,mask) do {\ +      setup->quad.input.x0 = x;\ +      setup->quad.input.y0 = y;\ +      setup->quad.inout.mask = mask;\ +      add_quad_job( &setup->que, &setup->quad, emit_quad_job );\ +   } while (0) + +#else + +#define EMIT_QUAD(setup,x,y,mask) do {\ +      setup->quad.input.x0 = x;\ +      setup->quad.input.y0 = y;\ +      setup->quad.inout.mask = mask;\ +      emit_quad( setup, &setup->quad, 0 );\ +   } while (0) + +#endif + +/** + * Given an X or Y coordinate, return the block/quad coordinate that it + * belongs to. + */ +static INLINE int block( int x ) +{ +   return x & ~1; +} + + +/** + * Render a horizontal span of quads + */ +static void flush_spans( struct setup_context *setup ) +{ +   const int xleft0 = setup->span.left[0]; +   const int xleft1 = setup->span.left[1]; +   const int xright0 = setup->span.right[0]; +   const int xright1 = setup->span.right[1]; +   int minleft, maxright; +   int x; + +   switch (setup->span.y_flags) { +   case 0x3: +      /* both odd and even lines written (both quad rows) */ +      minleft = block(MIN2(xleft0, xleft1)); +      maxright = block(MAX2(xright0, xright1)); +      for (x = minleft; x <= maxright; x += 2) { +         /* determine which of the four pixels is inside the span bounds */ +         uint mask = 0x0; +         if (x >= xleft0 && x < xright0) +            mask |= MASK_TOP_LEFT; +         if (x >= xleft1 && x < xright1) +            mask |= MASK_BOTTOM_LEFT; +         if (x+1 >= xleft0 && x+1 < xright0) +            mask |= MASK_TOP_RIGHT; +         if (x+1 >= xleft1 && x+1 < xright1) +            mask |= MASK_BOTTOM_RIGHT; +         EMIT_QUAD( setup, x, setup->span.y, mask ); +      } +      break; + +   case 0x1: +      /* only even line written (quad top row) */ +      minleft = block(xleft0); +      maxright = block(xright0); +      for (x = minleft; x <= maxright; x += 2) { +         uint mask = 0x0; +         if (x >= xleft0 && x < xright0) +            mask |= MASK_TOP_LEFT; +         if (x+1 >= xleft0 && x+1 < xright0) +            mask |= MASK_TOP_RIGHT; +         EMIT_QUAD( setup, x, setup->span.y, mask ); +      } +      break; + +   case 0x2: +      /* only odd line written (quad bottom row) */ +      minleft = block(xleft1); +      maxright = block(xright1); +      for (x = minleft; x <= maxright; x += 2) { +         uint mask = 0x0; +         if (x >= xleft1 && x < xright1) +            mask |= MASK_BOTTOM_LEFT; +         if (x+1 >= xleft1 && x+1 < xright1) +            mask |= MASK_BOTTOM_RIGHT; +         EMIT_QUAD( setup, x, setup->span.y, mask ); +      } +      break; + +   default: +      return; +   } + +   setup->span.y = 0; +   setup->span.y_flags = 0; +   setup->span.right[0] = 0; +   setup->span.right[1] = 0; +} + + +#if DEBUG_VERTS +static void print_vertex(const struct setup_context *setup, +                         const float (*v)[4]) +{ +   int i; +   debug_printf("   Vertex: (%p)\n", v); +   for (i = 0; i < setup->quad.nr_attrs; i++) { +      debug_printf("     %d: %f %f %f %f\n",  i, +              v[i][0], v[i][1], v[i][2], v[i][3]); +   } +} +#endif + +/** + * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise + */ +static boolean setup_sort_vertices( struct setup_context *setup, +                                    float det, +                                    const float (*v0)[4], +                                    const float (*v1)[4], +                                    const float (*v2)[4] ) +{ +   setup->vprovoke = v2; + +   /* determine bottom to top order of vertices */ +   { +      float y0 = v0[0][1]; +      float y1 = v1[0][1]; +      float y2 = v2[0][1]; +      if (y0 <= y1) { +	 if (y1 <= y2) { +	    /* y0<=y1<=y2 */ +	    setup->vmin = v0; +	    setup->vmid = v1; +	    setup->vmax = v2; +	 } +	 else if (y2 <= y0) { +	    /* y2<=y0<=y1 */ +	    setup->vmin = v2; +	    setup->vmid = v0; +	    setup->vmax = v1; +	 } +	 else { +	    /* y0<=y2<=y1 */ +	    setup->vmin = v0; +	    setup->vmid = v2; +	    setup->vmax = v1; +	 } +      } +      else { +	 if (y0 <= y2) { +	    /* y1<=y0<=y2 */ +	    setup->vmin = v1; +	    setup->vmid = v0; +	    setup->vmax = v2; +	 } +	 else if (y2 <= y1) { +	    /* y2<=y1<=y0 */ +	    setup->vmin = v2; +	    setup->vmid = v1; +	    setup->vmax = v0; +	 } +	 else { +	    /* y1<=y2<=y0 */ +	    setup->vmin = v1; +	    setup->vmid = v2; +	    setup->vmax = v0; +	 } +      } +   } + +   setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0]; +   setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1]; +   setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; +   setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; +   setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0]; +   setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1]; + +   /* +    * Compute triangle's area.  Use 1/area to compute partial +    * derivatives of attributes later. +    * +    * The area will be the same as prim->det, but the sign may be +    * different depending on how the vertices get sorted above. +    * +    * To determine whether the primitive is front or back facing we +    * use the prim->det value because its sign is correct. +    */ +   { +      const float area = (setup->emaj.dx * setup->ebot.dy - +			    setup->ebot.dx * setup->emaj.dy); + +      setup->oneoverarea = 1.0f / area; + +      /* +      debug_printf("%s one-over-area %f  area %f  det %f\n", +                   __FUNCTION__, setup->oneoverarea, area, det ); +      */ +      if (is_inf_or_nan(setup->oneoverarea)) +         return FALSE; +   } + +   /* We need to know if this is a front or back-facing triangle for: +    *  - the GLSL gl_FrontFacing fragment attribute (bool) +    *  - two-sided stencil test +    */ +   setup->quad.input.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); + +   return TRUE; +} + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + * The value value comes from vertex[slot][i]. + * The result will be put into setup->coef[slot].a0[i]. + * \param slot  which attribute slot + * \param i  which component of the slot (0..3) + */ +static void const_coeff( struct setup_context *setup, +                         struct tgsi_interp_coef *coef, +                         uint vertSlot, uint i) +{ +   assert(i <= 3); + +   coef->dadx[i] = 0; +   coef->dady[i] = 0; + +   /* need provoking vertex info! +    */ +   coef->a0[i] = setup->vprovoke[vertSlot][i]; +} + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void tri_linear_coeff( struct setup_context *setup, +                              struct tgsi_interp_coef *coef, +                              uint vertSlot, uint i) +{ +   float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; +   float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; +   float a = setup->ebot.dy * majda - botda * setup->emaj.dy; +   float b = setup->emaj.dx * botda - majda * setup->ebot.dx; +   float dadx = a * setup->oneoverarea; +   float dady = b * setup->oneoverarea; + +   assert(i <= 3); + +   coef->dadx[i] = dadx; +   coef->dady[i] = dady; + +   /* calculate a0 as the value which would be sampled for the +    * fragment at (0,0), taking into account that we want to sample at +    * pixel centers, in other words (0.5, 0.5). +    * +    * this is neat but unfortunately not a good way to do things for +    * triangles with very large values of dadx or dady as it will +    * result in the subtraction and re-addition from a0 of a very +    * large number, which means we'll end up loosing a lot of the +    * fractional bits and precision from a0.  the way to fix this is +    * to define a0 as the sample at a pixel center somewhere near vmin +    * instead - i'll switch to this later. +    */ +   coef->a0[i] = (setup->vmin[vertSlot][i] - +                  (dadx * (setup->vmin[0][0] - 0.5f) + +                   dady * (setup->vmin[0][1] - 0.5f))); + +   /* +   debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", +		slot, "xyzw"[i], +		setup->coef[slot].a0[i], +		setup->coef[slot].dadx[i], +		setup->coef[slot].dady[i]); +   */ +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void tri_persp_coeff( struct setup_context *setup, +                             struct tgsi_interp_coef *coef, +                             uint vertSlot, uint i) +{ +   /* premultiply by 1/w  (v[0][3] is always W): +    */ +   float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3]; +   float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3]; +   float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3]; +   float botda = mida - mina; +   float majda = maxa - mina; +   float a = setup->ebot.dy * majda - botda * setup->emaj.dy; +   float b = setup->emaj.dx * botda - majda * setup->ebot.dx; +   float dadx = a * setup->oneoverarea; +   float dady = b * setup->oneoverarea; + +   /* +   debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, +          	setup->vmin[vertSlot][i], +          	setup->vmid[vertSlot][i], +       		setup->vmax[vertSlot][i] +          ); +   */ +   assert(i <= 3); + +   coef->dadx[i] = dadx; +   coef->dady[i] = dady; +   coef->a0[i] = (mina - +                  (dadx * (setup->vmin[0][0] - 0.5f) + +                   dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial, though Y has to be inverted for OpenGL. + * Z and W are copied from posCoef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coeff(struct setup_context *setup, uint slot) +{ +   /*X*/ +   setup->coef[slot].a0[0] = 0; +   setup->coef[slot].dadx[0] = 1.0; +   setup->coef[slot].dady[0] = 0.0; +   /*Y*/ +   if (setup->softpipe->rasterizer->origin_lower_left) { +      /* y=0=bottom */ +      const int winHeight = setup->softpipe->framebuffer.height; +      setup->coef[slot].a0[1] = (float) (winHeight - 1); +      setup->coef[slot].dady[1] = -1.0; +   } +   else { +      /* y=0=top */ +      setup->coef[slot].a0[1] = 0.0; +      setup->coef[slot].dady[1] = 1.0; +   } +   setup->coef[slot].dadx[1] = 0.0; +   /*Z*/ +   setup->coef[slot].a0[2] = setup->posCoef.a0[2]; +   setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; +   setup->coef[slot].dady[2] = setup->posCoef.dady[2]; +   /*W*/ +   setup->coef[slot].a0[3] = setup->posCoef.a0[3]; +   setup->coef[slot].dadx[3] = setup->posCoef.dadx[3]; +   setup->coef[slot].dady[3] = setup->posCoef.dady[3]; +} + + + +/** + * Compute the setup->coef[] array dadx, dady, a0 values. + * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. + */ +static void setup_tri_coefficients( struct setup_context *setup ) +{ +   struct softpipe_context *softpipe = setup->softpipe; +   const struct sp_fragment_shader *spfs = softpipe->fs; +   const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); +   uint fragSlot; + +   /* z and w are done by linear interpolation: +    */ +   tri_linear_coeff(setup, &setup->posCoef, 0, 2); +   tri_linear_coeff(setup, &setup->posCoef, 0, 3); + +   /* setup interpolation for all the remaining attributes: +    */ +   for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { +      const uint vertSlot = vinfo->attrib[fragSlot].src_index; +      uint j; + +      switch (vinfo->attrib[fragSlot].interp_mode) { +      case INTERP_CONSTANT: +         for (j = 0; j < NUM_CHANNELS; j++) +            const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); +         break; +      case INTERP_LINEAR: +         for (j = 0; j < NUM_CHANNELS; j++) +            tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); +         break; +      case INTERP_PERSPECTIVE: +         for (j = 0; j < NUM_CHANNELS; j++) +            tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); +         break; +      case INTERP_POS: +         setup_fragcoord_coeff(setup, fragSlot); +         break; +      default: +         assert(0); +      } + +      if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { +         /* FOG.y = front/back facing  XXX fix this */ +         setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing; +         setup->coef[fragSlot].dadx[1] = 0.0; +         setup->coef[fragSlot].dady[1] = 0.0; +      } +   } +} + + + +static void setup_tri_edges( struct setup_context *setup ) +{ +   float vmin_x = setup->vmin[0][0] + 0.5f; +   float vmid_x = setup->vmid[0][0] + 0.5f; + +   float vmin_y = setup->vmin[0][1] - 0.5f; +   float vmid_y = setup->vmid[0][1] - 0.5f; +   float vmax_y = setup->vmax[0][1] - 0.5f; + +   setup->emaj.sy = ceilf(vmin_y); +   setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy); +   setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; +   setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; + +   setup->etop.sy = ceilf(vmid_y); +   setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy); +   setup->etop.dxdy = setup->etop.dx / setup->etop.dy; +   setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; + +   setup->ebot.sy = ceilf(vmin_y); +   setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy); +   setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; +   setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; +} + + +/** + * Render the upper or lower half of a triangle. + * Scissoring/cliprect is applied here too. + */ +static void subtriangle( struct setup_context *setup, +			 struct edge *eleft, +			 struct edge *eright, +			 unsigned lines ) +{ +   const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; +   const int minx = (int) cliprect->minx; +   const int maxx = (int) cliprect->maxx; +   const int miny = (int) cliprect->miny; +   const int maxy = (int) cliprect->maxy; +   int y, start_y, finish_y; +   int sy = (int)eleft->sy; + +   assert((int)eleft->sy == (int) eright->sy); + +   /* clip top/bottom */ +   start_y = sy; +   finish_y = sy + lines; + +   if (start_y < miny) +      start_y = miny; + +   if (finish_y > maxy) +      finish_y = maxy; + +   start_y -= sy; +   finish_y -= sy; + +   /* +   debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); +   */ + +   for (y = start_y; y < finish_y; y++) { + +      /* avoid accumulating adds as floats don't have the precision to +       * accurately iterate large triangle edges that way.  luckily we +       * can just multiply these days. +       * +       * this is all drowned out by the attribute interpolation anyway. +       */ +      int left = (int)(eleft->sx + y * eleft->dxdy); +      int right = (int)(eright->sx + y * eright->dxdy); + +      /* clip left/right */ +      if (left < minx) +         left = minx; +      if (right > maxx) +         right = maxx; + +      if (left < right) { +         int _y = sy + y; +         if (block(_y) != setup->span.y) { +            flush_spans(setup); +            setup->span.y = block(_y); +         } + +         setup->span.left[_y&1] = left; +         setup->span.right[_y&1] = right; +         setup->span.y_flags |= 1<<(_y&1); +      } +   } + + +   /* save the values so that emaj can be restarted: +    */ +   eleft->sx += lines * eleft->dxdy; +   eright->sx += lines * eright->dxdy; +   eleft->sy += lines; +   eright->sy += lines; +} + + +/** + * Recalculate prim's determinant.  This is needed as we don't have + * get this information through the vbuf_render interface & we must + * calculate it here. + */ +static float +calc_det( const float (*v0)[4], +          const float (*v1)[4], +          const float (*v2)[4] ) +{ +   /* edge vectors e = v0 - v2, f = v1 - v2 */ +   const float ex = v0[0][0] - v2[0][0]; +   const float ey = v0[0][1] - v2[0][1]; +   const float fx = v1[0][0] - v2[0][0]; +   const float fy = v1[0][1] - v2[0][1]; + +   /* det = cross(e,f).z */ +   return ex * fy - ey * fx; +} + + +/** + * Do setup for triangle rasterization, then render the triangle. + */ +void setup_tri( struct setup_context *setup, +                const float (*v0)[4], +                const float (*v1)[4], +                const float (*v2)[4] ) +{ +   float det; + +#if DEBUG_VERTS +   debug_printf("Setup triangle:\n"); +   print_vertex(setup, v0); +   print_vertex(setup, v1); +   print_vertex(setup, v2); +#endif + +   if (setup->softpipe->no_rast) +      return; +    +   det = calc_det(v0, v1, v2); +   /* +   debug_printf("%s\n", __FUNCTION__ ); +   */ + +#if DEBUG_FRAGS +   setup->numFragsEmitted = 0; +   setup->numFragsWritten = 0; +#endif + +   if (cull_tri( setup, det )) +      return; + +   if (!setup_sort_vertices( setup, det, v0, v1, v2 )) +      return; +   setup_tri_coefficients( setup ); +   setup_tri_edges( setup ); + +   setup->quad.input.prim = PRIM_TRI; + +   setup->span.y = 0; +   setup->span.y_flags = 0; +   setup->span.right[0] = 0; +   setup->span.right[1] = 0; +   /*   setup->span.z_mode = tri_z_mode( setup->ctx ); */ + +   /*   init_constant_attribs( setup ); */ + +   if (setup->oneoverarea < 0.0) { +      /* emaj on left: +       */ +      subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); +      subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); +   } +   else { +      /* emaj on right: +       */ +      subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); +      subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); +   } + +   flush_spans( setup ); + +   WAIT_FOR_COMPLETION(setup); + +#if DEBUG_FRAGS +   printf("Tri: %u frags emitted, %u written\n", +          setup->numFragsEmitted, +          setup->numFragsWritten); +#endif +} + + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a line. + */ +static void +line_linear_coeff(struct setup_context *setup, +                  struct tgsi_interp_coef *coef, +                  uint vertSlot, uint i) +{ +   const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; +   const float dadx = da * setup->emaj.dx * setup->oneoverarea; +   const float dady = da * setup->emaj.dy * setup->oneoverarea; +   coef->dadx[i] = dadx; +   coef->dady[i] = dady; +   coef->a0[i] = (setup->vmin[vertSlot][i] - +                  (dadx * (setup->vmin[0][0] - 0.5f) + +                   dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a line. + */ +static void +line_persp_coeff(struct setup_context *setup, +                  struct tgsi_interp_coef *coef, +                  uint vertSlot, uint i) +{ +   /* XXX double-check/verify this arithmetic */ +   const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3]; +   const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3]; +   const float da = a1 - a0; +   const float dadx = da * setup->emaj.dx * setup->oneoverarea; +   const float dady = da * setup->emaj.dy * setup->oneoverarea; +   coef->dadx[i] = dadx; +   coef->dady[i] = dady; +   coef->a0[i] = (setup->vmin[vertSlot][i] - +                  (dadx * (setup->vmin[0][0] - 0.5f) + +                   dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Compute the setup->coef[] array dadx, dady, a0 values. + * Must be called after setup->vmin,vmax are initialized. + */ +static INLINE boolean +setup_line_coefficients(struct setup_context *setup, +                        const float (*v0)[4], +                        const float (*v1)[4]) +{ +   struct softpipe_context *softpipe = setup->softpipe; +   const struct sp_fragment_shader *spfs = softpipe->fs; +   const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); +   uint fragSlot; +   float area; + +   /* use setup->vmin, vmax to point to vertices */ +   setup->vprovoke = v1; +   setup->vmin = v0; +   setup->vmax = v1; + +   setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; +   setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; + +   /* NOTE: this is not really area but something proportional to it */ +   area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy; +   if (area == 0.0f || is_inf_or_nan(area)) +      return FALSE; +   setup->oneoverarea = 1.0f / area; + +   /* z and w are done by linear interpolation: +    */ +   line_linear_coeff(setup, &setup->posCoef, 0, 2); +   line_linear_coeff(setup, &setup->posCoef, 0, 3); + +   /* setup interpolation for all the remaining attributes: +    */ +   for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { +      const uint vertSlot = vinfo->attrib[fragSlot].src_index; +      uint j; + +      switch (vinfo->attrib[fragSlot].interp_mode) { +      case INTERP_CONSTANT: +         for (j = 0; j < NUM_CHANNELS; j++) +            const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); +         break; +      case INTERP_LINEAR: +         for (j = 0; j < NUM_CHANNELS; j++) +            line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); +         break; +      case INTERP_PERSPECTIVE: +         for (j = 0; j < NUM_CHANNELS; j++) +            line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); +         break; +      case INTERP_POS: +         setup_fragcoord_coeff(setup, fragSlot); +         break; +      default: +         assert(0); +      } + +      if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { +         /* FOG.y = front/back facing  XXX fix this */ +         setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing; +         setup->coef[fragSlot].dadx[1] = 0.0; +         setup->coef[fragSlot].dady[1] = 0.0; +      } +   } +   return TRUE; +} + + +/** + * Plot a pixel in a line segment. + */ +static INLINE void +plot(struct setup_context *setup, int x, int y) +{ +   const int iy = y & 1; +   const int ix = x & 1; +   const int quadX = x - ix; +   const int quadY = y - iy; +   const int mask = (1 << ix) << (2 * iy); + +   if (quadX != setup->quad.input.x0 || +       quadY != setup->quad.input.y0) +   { +      /* flush prev quad, start new quad */ + +      if (setup->quad.input.x0 != -1) +         CLIP_EMIT_QUAD(setup); + +      setup->quad.input.x0 = quadX; +      setup->quad.input.y0 = quadY; +      setup->quad.inout.mask = 0x0; +   } + +   setup->quad.inout.mask |= mask; +} + + +/** + * Do setup for line rasterization, then render the line. + * Single-pixel width, no stipple, etc.  We rely on the 'draw' module + * to handle stippling and wide lines. + */ +void +setup_line(struct setup_context *setup, +           const float (*v0)[4], +           const float (*v1)[4]) +{ +   int x0 = (int) v0[0][0]; +   int x1 = (int) v1[0][0]; +   int y0 = (int) v0[0][1]; +   int y1 = (int) v1[0][1]; +   int dx = x1 - x0; +   int dy = y1 - y0; +   int xstep, ystep; + +#if DEBUG_VERTS +   debug_printf("Setup line:\n"); +   print_vertex(setup, v0); +   print_vertex(setup, v1); +#endif + +   if (setup->softpipe->no_rast) +      return; + +   if (dx == 0 && dy == 0) +      return; + +   if (!setup_line_coefficients(setup, v0, v1)) +      return; + +   assert(v0[0][0] < 1.0e9); +   assert(v0[0][1] < 1.0e9); +   assert(v1[0][0] < 1.0e9); +   assert(v1[0][1] < 1.0e9); + +   if (dx < 0) { +      dx = -dx;   /* make positive */ +      xstep = -1; +   } +   else { +      xstep = 1; +   } + +   if (dy < 0) { +      dy = -dy;   /* make positive */ +      ystep = -1; +   } +   else { +      ystep = 1; +   } + +   assert(dx >= 0); +   assert(dy >= 0); + +   setup->quad.input.x0 = setup->quad.input.y0 = -1; +   setup->quad.inout.mask = 0x0; +   setup->quad.input.prim = PRIM_LINE; +   /* XXX temporary: set coverage to 1.0 so the line appears +    * if AA mode happens to be enabled. +    */ +   setup->quad.input.coverage[0] = +   setup->quad.input.coverage[1] = +   setup->quad.input.coverage[2] = +   setup->quad.input.coverage[3] = 1.0; + +   if (dx > dy) { +      /*** X-major line ***/ +      int i; +      const int errorInc = dy + dy; +      int error = errorInc - dx; +      const int errorDec = error - dx; + +      for (i = 0; i < dx; i++) { +         plot(setup, x0, y0); + +         x0 += xstep; +         if (error < 0) { +            error += errorInc; +         } +         else { +            error += errorDec; +            y0 += ystep; +         } +      } +   } +   else { +      /*** Y-major line ***/ +      int i; +      const int errorInc = dx + dx; +      int error = errorInc - dy; +      const int errorDec = error - dy; + +      for (i = 0; i < dy; i++) { +         plot(setup, x0, y0); + +         y0 += ystep; +         if (error < 0) { +            error += errorInc; +         } +         else { +            error += errorDec; +            x0 += xstep; +         } +      } +   } + +   /* draw final quad */ +   if (setup->quad.inout.mask) { +      CLIP_EMIT_QUAD(setup); +   } + +   WAIT_FOR_COMPLETION(setup); +} + + +static void +point_persp_coeff(struct setup_context *setup, +                  const float (*vert)[4], +                  struct tgsi_interp_coef *coef, +                  uint vertSlot, uint i) +{ +   assert(i <= 3); +   coef->dadx[i] = 0.0F; +   coef->dady[i] = 0.0F; +   coef->a0[i] = vert[vertSlot][i] * vert[0][3]; +} + + +/** + * Do setup for point rasterization, then render the point. + * Round or square points... + * XXX could optimize a lot for 1-pixel points. + */ +void +setup_point( struct setup_context *setup, +             const float (*v0)[4] ) +{ +   struct softpipe_context *softpipe = setup->softpipe; +   const struct sp_fragment_shader *spfs = softpipe->fs; +   const int sizeAttr = setup->softpipe->psize_slot; +   const float size +      = sizeAttr > 0 ? v0[sizeAttr][0] +      : setup->softpipe->rasterizer->point_size; +   const float halfSize = 0.5F * size; +   const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth; +   const float x = v0[0][0];  /* Note: data[0] is always position */ +   const float y = v0[0][1]; +   const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); +   uint fragSlot; + +#if DEBUG_VERTS +   debug_printf("Setup point:\n"); +   print_vertex(setup, v0); +#endif + +   if (softpipe->no_rast) +      return; + +   /* For points, all interpolants are constant-valued. +    * However, for point sprites, we'll need to setup texcoords appropriately. +    * XXX: which coefficients are the texcoords??? +    * We may do point sprites as textured quads... +    * +    * KW: We don't know which coefficients are texcoords - ultimately +    * the choice of what interpolation mode to use for each attribute +    * should be determined by the fragment program, using +    * per-attribute declaration statements that include interpolation +    * mode as a parameter.  So either the fragment program will have +    * to be adjusted for pointsprite vs normal point behaviour, or +    * otherwise a special interpolation mode will have to be defined +    * which matches the required behaviour for point sprites.  But - +    * the latter is not a feature of normal hardware, and as such +    * probably should be ruled out on that basis. +    */ +   setup->vprovoke = v0; + +   /* setup Z, W */ +   const_coeff(setup, &setup->posCoef, 0, 2); +   const_coeff(setup, &setup->posCoef, 0, 3); + +   for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { +      const uint vertSlot = vinfo->attrib[fragSlot].src_index; +      uint j; + +      switch (vinfo->attrib[fragSlot].interp_mode) { +      case INTERP_CONSTANT: +         /* fall-through */ +      case INTERP_LINEAR: +         for (j = 0; j < NUM_CHANNELS; j++) +            const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); +         break; +      case INTERP_PERSPECTIVE: +         for (j = 0; j < NUM_CHANNELS; j++) +            point_persp_coeff(setup, setup->vprovoke, +                              &setup->coef[fragSlot], vertSlot, j); +         break; +      case INTERP_POS: +         setup_fragcoord_coeff(setup, fragSlot); +         break; +      default: +         assert(0); +      } + +      if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { +         /* FOG.y = front/back facing  XXX fix this */ +         setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing; +         setup->coef[fragSlot].dadx[1] = 0.0; +         setup->coef[fragSlot].dady[1] = 0.0; +      } +   } + +   setup->quad.input.prim = PRIM_POINT; + +   if (halfSize <= 0.5 && !round) { +      /* special case for 1-pixel points */ +      const int ix = ((int) x) & 1; +      const int iy = ((int) y) & 1; +      setup->quad.input.x0 = (int) x - ix; +      setup->quad.input.y0 = (int) y - iy; +      setup->quad.inout.mask = (1 << ix) << (2 * iy); +      CLIP_EMIT_QUAD(setup); +   } +   else { +      if (round) { +         /* rounded points */ +         const int ixmin = block((int) (x - halfSize)); +         const int ixmax = block((int) (x + halfSize)); +         const int iymin = block((int) (y - halfSize)); +         const int iymax = block((int) (y + halfSize)); +         const float rmin = halfSize - 0.7071F;  /* 0.7071 = sqrt(2)/2 */ +         const float rmax = halfSize + 0.7071F; +         const float rmin2 = MAX2(0.0F, rmin * rmin); +         const float rmax2 = rmax * rmax; +         const float cscale = 1.0F / (rmax2 - rmin2); +         int ix, iy; + +         for (iy = iymin; iy <= iymax; iy += 2) { +            for (ix = ixmin; ix <= ixmax; ix += 2) { +               float dx, dy, dist2, cover; + +               setup->quad.inout.mask = 0x0; + +               dx = (ix + 0.5f) - x; +               dy = (iy + 0.5f) - y; +               dist2 = dx * dx + dy * dy; +               if (dist2 <= rmax2) { +                  cover = 1.0F - (dist2 - rmin2) * cscale; +                  setup->quad.input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); +                  setup->quad.inout.mask |= MASK_TOP_LEFT; +               } + +               dx = (ix + 1.5f) - x; +               dy = (iy + 0.5f) - y; +               dist2 = dx * dx + dy * dy; +               if (dist2 <= rmax2) { +                  cover = 1.0F - (dist2 - rmin2) * cscale; +                  setup->quad.input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); +                  setup->quad.inout.mask |= MASK_TOP_RIGHT; +               } + +               dx = (ix + 0.5f) - x; +               dy = (iy + 1.5f) - y; +               dist2 = dx * dx + dy * dy; +               if (dist2 <= rmax2) { +                  cover = 1.0F - (dist2 - rmin2) * cscale; +                  setup->quad.input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); +                  setup->quad.inout.mask |= MASK_BOTTOM_LEFT; +               } + +               dx = (ix + 1.5f) - x; +               dy = (iy + 1.5f) - y; +               dist2 = dx * dx + dy * dy; +               if (dist2 <= rmax2) { +                  cover = 1.0F - (dist2 - rmin2) * cscale; +                  setup->quad.input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); +                  setup->quad.inout.mask |= MASK_BOTTOM_RIGHT; +               } + +               if (setup->quad.inout.mask) { +                  setup->quad.input.x0 = ix; +                  setup->quad.input.y0 = iy; +                  CLIP_EMIT_QUAD(setup); +               } +            } +         } +      } +      else { +         /* square points */ +         const int xmin = (int) (x + 0.75 - halfSize); +         const int ymin = (int) (y + 0.25 - halfSize); +         const int xmax = xmin + (int) size; +         const int ymax = ymin + (int) size; +         /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ +         const int ixmin = block(xmin); +         const int ixmax = block(xmax - 1); +         const int iymin = block(ymin); +         const int iymax = block(ymax - 1); +         int ix, iy; + +         /* +         debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); +         */ +         for (iy = iymin; iy <= iymax; iy += 2) { +            uint rowMask = 0xf; +            if (iy < ymin) { +               /* above the top edge */ +               rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); +            } +            if (iy + 1 >= ymax) { +               /* below the bottom edge */ +               rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); +            } + +            for (ix = ixmin; ix <= ixmax; ix += 2) { +               uint mask = rowMask; + +               if (ix < xmin) { +                  /* fragment is past left edge of point, turn off left bits */ +                  mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); +               } +               if (ix + 1 >= xmax) { +                  /* past the right edge */ +                  mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); +               } + +               setup->quad.inout.mask = mask; +               setup->quad.input.x0 = ix; +               setup->quad.input.y0 = iy; +               CLIP_EMIT_QUAD(setup); +            } +         } +      } +   } + +   WAIT_FOR_COMPLETION(setup); +} + +void setup_prepare( struct setup_context *setup ) +{ +   struct softpipe_context *sp = setup->softpipe; +   unsigned i; + +   if (sp->dirty) { +      softpipe_update_derived(sp); +   } + +   /* Mark surfaces as defined now */ +   for (i = 0; i < sp->framebuffer.nr_cbufs; i++){ +      if (sp->framebuffer.cbufs[i]) { +         sp->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; +      } +   } +   if (sp->framebuffer.zsbuf) { +      sp->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; +   } + +   /* Note: nr_attrs is only used for debugging (vertex printing) */ +   setup->quad.nr_attrs = draw_num_vs_outputs(sp->draw); + +   for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { +      sp->quad[i].first->begin( sp->quad[i].first ); +   } + +   if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES && +       sp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && +       sp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { +      /* we'll do culling */ +      setup->winding = sp->rasterizer->cull_mode; +   } +   else { +      /* 'draw' will do culling */ +      setup->winding = PIPE_WINDING_NONE; +   } +} + + + +void setup_destroy_context( struct setup_context *setup ) +{ +   FREE( setup ); +} + + +/** + * Create a new primitive setup/render stage. + */ +struct setup_context *setup_create_context( struct softpipe_context *softpipe ) +{ +   struct setup_context *setup = CALLOC_STRUCT(setup_context); +#if SP_NUM_QUAD_THREADS > 1 +   uint i; +#endif + +   setup->softpipe = softpipe; + +   setup->quad.coef = setup->coef; +   setup->quad.posCoef = &setup->posCoef; + +#if SP_NUM_QUAD_THREADS > 1 +   setup->que.first = 0; +   setup->que.last = 0; +   pipe_mutex_init( setup->que.que_mutex ); +   pipe_condvar_init( setup->que.que_notfull_condvar ); +   pipe_condvar_init( setup->que.que_notempty_condvar ); +   setup->que.jobs_added = 0; +   setup->que.jobs_done = 0; +   pipe_condvar_init( setup->que.que_done_condvar ); +   for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { +      setup->threads[i].setup = setup; +      setup->threads[i].id = i; +      setup->threads[i].handle = pipe_thread_create( quad_thread, &setup->threads[i] ); +   } +#endif + +   return setup; +} + diff --git a/src/gallium/drivers/softpipe/sp_setup.h b/src/gallium/drivers/softpipe/sp_setup.h new file mode 100644 index 0000000000..d54f334428 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_setup.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#ifndef SP_SETUP_H +#define SP_SETUP_H + +struct setup_context; +struct softpipe_context; + +void  +setup_tri( struct setup_context *setup, +	   const float (*v0)[4], +	   const float (*v1)[4], +	   const float (*v2)[4] ); + +void +setup_line(struct setup_context *setup, +           const float (*v0)[4], +           const float (*v1)[4]); + +void +setup_point( struct setup_context *setup, +             const float (*v0)[4] ); + + +struct setup_context *setup_create_context( struct softpipe_context *softpipe ); +void setup_prepare( struct setup_context *setup ); +void setup_destroy_context( struct setup_context *setup ); + +#endif diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h new file mode 100644 index 0000000000..3eff41ffa5 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -0,0 +1,206 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_STATE_H +#define SP_STATE_H + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + + +#define SP_NEW_VIEWPORT      0x1 +#define SP_NEW_RASTERIZER    0x2 +#define SP_NEW_FS            0x4 +#define SP_NEW_BLEND         0x8 +#define SP_NEW_CLIP          0x10 +#define SP_NEW_SCISSOR       0x20 +#define SP_NEW_STIPPLE       0x40 +#define SP_NEW_FRAMEBUFFER   0x80 +#define SP_NEW_DEPTH_STENCIL_ALPHA 0x100 +#define SP_NEW_CONSTANTS     0x200 +#define SP_NEW_SAMPLER       0x400 +#define SP_NEW_TEXTURE       0x800 +#define SP_NEW_VERTEX        0x1000 +#define SP_NEW_VS            0x2000 +#define SP_NEW_QUERY         0x4000 + + +struct tgsi_sampler; +struct tgsi_exec_machine; +struct vertex_info; + + +/** + * Subclass of pipe_shader_state (though it doesn't really need to be). + * + * This is starting to look an awful lot like a quad pipeline stage... + */ +struct sp_fragment_shader { +   struct pipe_shader_state shader; + +   struct tgsi_shader_info info; + +   void (*prepare)( const struct sp_fragment_shader *shader, +		    struct tgsi_exec_machine *machine, +		    struct tgsi_sampler **samplers); + +   /* Run the shader - this interface will get cleaned up in the +    * future: +    */ +   unsigned (*run)( const struct sp_fragment_shader *shader, +		    struct tgsi_exec_machine *machine, +		    struct quad_header *quad ); + + +   void (*delete)( struct sp_fragment_shader * ); +}; + + +/** Subclass of pipe_shader_state */ +struct sp_vertex_shader { +   struct pipe_shader_state shader;  /* Note: this field not actually used */ +   struct draw_vertex_shader *draw_data; +}; + + + +void * +softpipe_create_blend_state(struct pipe_context *, +                            const struct pipe_blend_state *); +void softpipe_bind_blend_state(struct pipe_context *, +                               void *); +void softpipe_delete_blend_state(struct pipe_context *, +                                 void *); + +void * +softpipe_create_sampler_state(struct pipe_context *, +                              const struct pipe_sampler_state *); +void softpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); +void softpipe_delete_sampler_state(struct pipe_context *, void *); + +void * +softpipe_create_depth_stencil_state(struct pipe_context *, +                                    const struct pipe_depth_stencil_alpha_state *); +void softpipe_bind_depth_stencil_state(struct pipe_context *, void *); +void softpipe_delete_depth_stencil_state(struct pipe_context *, void *); + +void * +softpipe_create_rasterizer_state(struct pipe_context *, +                                 const struct pipe_rasterizer_state *); +void softpipe_bind_rasterizer_state(struct pipe_context *, void *); +void softpipe_delete_rasterizer_state(struct pipe_context *, void *); + +void softpipe_set_framebuffer_state( struct pipe_context *, +			     const struct pipe_framebuffer_state * ); + +void softpipe_set_blend_color( struct pipe_context *pipe, +                               const struct pipe_blend_color *blend_color ); + +void softpipe_set_clip_state( struct pipe_context *, +			     const struct pipe_clip_state * ); + +void softpipe_set_constant_buffer(struct pipe_context *, +                                  uint shader, uint index, +                                  const struct pipe_constant_buffer *buf); + +void *softpipe_create_fs_state(struct pipe_context *, +                               const struct pipe_shader_state *); +void softpipe_bind_fs_state(struct pipe_context *, void *); +void softpipe_delete_fs_state(struct pipe_context *, void *); +void *softpipe_create_vs_state(struct pipe_context *, +                               const struct pipe_shader_state *); +void softpipe_bind_vs_state(struct pipe_context *, void *); +void softpipe_delete_vs_state(struct pipe_context *, void *); + +void softpipe_set_polygon_stipple( struct pipe_context *, +				  const struct pipe_poly_stipple * ); + +void softpipe_set_scissor_state( struct pipe_context *, +                                 const struct pipe_scissor_state * ); + +void softpipe_set_sampler_textures( struct pipe_context *, +                                    unsigned num, +                                    struct pipe_texture ** ); + +void softpipe_set_viewport_state( struct pipe_context *, +                                  const struct pipe_viewport_state * ); + +void softpipe_set_vertex_elements(struct pipe_context *, +                                  unsigned count, +                                  const struct pipe_vertex_element *); + +void softpipe_set_vertex_buffers(struct pipe_context *, +                                 unsigned count, +                                 const struct pipe_vertex_buffer *); + + +void softpipe_update_derived( struct softpipe_context *softpipe ); + + +boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, +			     unsigned start, unsigned count); + +boolean softpipe_draw_elements(struct pipe_context *pipe, +			       struct pipe_buffer *indexBuffer, +			       unsigned indexSize, +			       unsigned mode, unsigned start, unsigned count); +boolean +softpipe_draw_range_elements(struct pipe_context *pipe, +                             struct pipe_buffer *indexBuffer, +                             unsigned indexSize, +                             unsigned min_index, +                             unsigned max_index, +                             unsigned mode, unsigned start, unsigned count); + +void +softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); + + +void +softpipe_map_surfaces(struct softpipe_context *sp); + +void +softpipe_unmap_surfaces(struct softpipe_context *sp); + +void +softpipe_map_texture_surfaces(struct softpipe_context *sp); + +void +softpipe_unmap_texture_surfaces(struct softpipe_context *sp); + + +struct vertex_info * +softpipe_get_vertex_info(struct softpipe_context *softpipe); + +struct vertex_info * +softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); + + +#endif diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c new file mode 100644 index 0000000000..384fe559af --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -0,0 +1,98 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_state.h" + + +void * +softpipe_create_blend_state(struct pipe_context *pipe, +                            const struct pipe_blend_state *blend) +{ +   return mem_dup(blend, sizeof(*blend)); +} + +void softpipe_bind_blend_state( struct pipe_context *pipe, +                                void *blend ) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); + +   softpipe->blend = (const struct pipe_blend_state *)blend; + +   softpipe->dirty |= SP_NEW_BLEND; +} + +void softpipe_delete_blend_state(struct pipe_context *pipe, +                                 void *blend) +{ +   FREE( blend ); +} + + +void softpipe_set_blend_color( struct pipe_context *pipe, +			     const struct pipe_blend_color *blend_color ) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); + +   softpipe->blend_color = *blend_color; + +   softpipe->dirty |= SP_NEW_BLEND; +} + + +/** XXX move someday?  Or consolidate all these simple state setters + * into one file. + */ + + +void * +softpipe_create_depth_stencil_state(struct pipe_context *pipe, +				    const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ +   return mem_dup(depth_stencil, sizeof(*depth_stencil)); +} + +void +softpipe_bind_depth_stencil_state(struct pipe_context *pipe, +                                  void *depth_stencil) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); + +   softpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + +   softpipe->dirty |= SP_NEW_DEPTH_STENCIL_ALPHA; +} + +void +softpipe_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) +{ +   FREE( depth ); +} diff --git a/src/gallium/drivers/softpipe/sp_state_clip.c b/src/gallium/drivers/softpipe/sp_state_clip.c new file mode 100644 index 0000000000..4946c776e3 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_clip.c @@ -0,0 +1,79 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ +#include "sp_context.h" +#include "sp_state.h" +#include "draw/draw_context.h" + + +void softpipe_set_clip_state( struct pipe_context *pipe, +			     const struct pipe_clip_state *clip ) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); + +   /* pass the clip state to the draw module */ +   draw_set_clip_state(softpipe->draw, clip); +} + + +void softpipe_set_viewport_state( struct pipe_context *pipe, +                                  const struct pipe_viewport_state *viewport ) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); + +   /* pass the viewport info to the draw module */ +   draw_set_viewport_state(softpipe->draw, viewport); + +   softpipe->viewport = *viewport; /* struct copy */ +   softpipe->dirty |= SP_NEW_VIEWPORT; +} + + +void softpipe_set_scissor_state( struct pipe_context *pipe, +                                 const struct pipe_scissor_state *scissor ) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); + +   draw_flush(softpipe->draw); + +   softpipe->scissor = *scissor; /* struct copy */ +   softpipe->dirty |= SP_NEW_SCISSOR; +} + + +void softpipe_set_polygon_stipple( struct pipe_context *pipe, +                                   const struct pipe_poly_stipple *stipple ) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); + +   draw_flush(softpipe->draw); + +   softpipe->poly_stipple = *stipple; /* struct copy */ +   softpipe->dirty |= SP_NEW_STIPPLE; +} diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c new file mode 100644 index 0000000000..6b6a4c3ff3 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -0,0 +1,210 @@ +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_private.h" +#include "sp_context.h" +#include "sp_state.h" + + +/** + * Mark the current vertex layout as "invalid". + * We'll validate the vertex layout later, when we start to actually + * render a point or line or tri. + */ +static void +invalidate_vertex_layout(struct softpipe_context *softpipe) +{ +   softpipe->vertex_info.num_attribs =  0; +} + + +/** + * The vertex info describes how to convert the post-transformed vertices + * (simple float[][4]) used by the 'draw' module into vertices for + * rasterization. + * + * This function validates the vertex layout and returns a pointer to a + * vertex_info object. + */ +struct vertex_info * +softpipe_get_vertex_info(struct softpipe_context *softpipe) +{ +   struct vertex_info *vinfo = &softpipe->vertex_info; + +   if (vinfo->num_attribs == 0) { +      /* compute vertex layout now */ +      const struct sp_fragment_shader *spfs = softpipe->fs; +      const enum interp_mode colorInterp +         = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; +      uint i; + +      if (softpipe->vbuf) { +         /* if using the post-transform vertex buffer, tell draw_vbuf to +          * simply emit the whole post-xform vertex as-is: +          */ +         struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; +         const uint num = draw_num_vs_outputs(softpipe->draw); +         uint i; + +         /* No longer any need to try and emit draw vertex_header info. +          */ +         vinfo_vbuf->num_attribs = 0; +         for (i = 0; i < num; i++) { +            draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); +         } +         draw_compute_vertex_size(vinfo_vbuf); +      } + +      /* +       * Loop over fragment shader inputs, searching for the matching output +       * from the vertex shader. +       */ +      vinfo->num_attribs = 0; +      for (i = 0; i < spfs->info.num_inputs; i++) { +         int src; +         switch (spfs->info.input_semantic_name[i]) { +         case TGSI_SEMANTIC_POSITION: +            src = draw_find_vs_output(softpipe->draw, +                                      TGSI_SEMANTIC_POSITION, 0); +            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); +            break; + +         case TGSI_SEMANTIC_COLOR: +            src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_COLOR,  +                                 spfs->info.input_semantic_index[i]); +            draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); +            break; + +         case TGSI_SEMANTIC_FOG: +            src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0); +            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); +            break; + +         case TGSI_SEMANTIC_GENERIC: +            /* this includes texcoords and varying vars */ +            src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC, +                                      spfs->info.input_semantic_index[i]); +            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); +            break; + +         default: +            assert(0); +         } +      } + +      softpipe->psize_slot = draw_find_vs_output(softpipe->draw, +                                                 TGSI_SEMANTIC_PSIZE, 0); +      if (softpipe->psize_slot > 0) { +         draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, +                               softpipe->psize_slot); +      } + +      draw_compute_vertex_size(vinfo); +   } + +   return vinfo; +} + + +/** + * Called from vbuf module. + * + * Note that there's actually two different vertex layouts in softpipe. + * + * The normal one is computed in softpipe_get_vertex_info() above and is + * used by the point/line/tri "setup" code. + * + * The other one (this one) is only used by the vbuf module (which is + * not normally used by default but used in testing).  For the vbuf module, + * we basically want to pass-through the draw module's vertex layout as-is. + * When the softpipe vbuf code begins drawing, the normal vertex layout + * will come into play again. + */ +struct vertex_info * +softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe) +{ +   (void) softpipe_get_vertex_info(softpipe); +   return &softpipe->vertex_info_vbuf; +} + + +/** + * Recompute cliprect from scissor bounds, scissor enable and surface size. + */ +static void +compute_cliprect(struct softpipe_context *sp) +{ +   uint surfWidth = sp->framebuffer.width; +   uint surfHeight = sp->framebuffer.height; + +   if (sp->rasterizer->scissor) { +      /* clip to scissor rect */ +      sp->cliprect.minx = MAX2(sp->scissor.minx, 0); +      sp->cliprect.miny = MAX2(sp->scissor.miny, 0); +      sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); +      sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight); +   } +   else { +      /* clip to surface bounds */ +      sp->cliprect.minx = 0; +      sp->cliprect.miny = 0; +      sp->cliprect.maxx = surfWidth; +      sp->cliprect.maxy = surfHeight; +   } +} + + +/* Hopefully this will remain quite simple, otherwise need to pull in + * something like the state tracker mechanism. + */ +void softpipe_update_derived( struct softpipe_context *softpipe ) +{ +   if (softpipe->dirty & (SP_NEW_RASTERIZER | +                          SP_NEW_FS | +                          SP_NEW_VS)) +      invalidate_vertex_layout( softpipe ); + +   if (softpipe->dirty & (SP_NEW_SCISSOR | +                          SP_NEW_DEPTH_STENCIL_ALPHA | +                          SP_NEW_FRAMEBUFFER)) +      compute_cliprect(softpipe); + +   if (softpipe->dirty & (SP_NEW_BLEND | +                          SP_NEW_DEPTH_STENCIL_ALPHA | +                          SP_NEW_FRAMEBUFFER | +                          SP_NEW_RASTERIZER | +                          SP_NEW_FS |  +			  SP_NEW_QUERY)) +      sp_build_quad_pipeline(softpipe); + +   softpipe->dirty = 0; +} diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c new file mode 100644 index 0000000000..4d01a9dbe1 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -0,0 +1,160 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_fs.h" + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_scan.h" + + +void * +softpipe_create_fs_state(struct pipe_context *pipe, +                         const struct pipe_shader_state *templ) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); +   struct sp_fragment_shader *state; + +   /* debug */ +   if (softpipe->dump_fs)  +      tgsi_dump(templ->tokens, 0); + +   /* codegen */ +   state = softpipe_create_fs_llvm( softpipe, templ ); +   if (!state) { +      state = softpipe_create_fs_sse( softpipe, templ ); +      if (!state) { +         state = softpipe_create_fs_exec( softpipe, templ ); +      } +   } + +   assert(state); + +   /* get/save the summary info for this shader */ +   tgsi_scan_shader(templ->tokens, &state->info); + +   return state; +} + + +void +softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); + +   softpipe->fs = (struct sp_fragment_shader *) fs; + +   softpipe->dirty |= SP_NEW_FS; +} + + +void +softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) +{ +   struct sp_fragment_shader *state = fs; + +   assert(fs != softpipe_context(pipe)->fs); +    +   state->delete( state ); +} + + +void * +softpipe_create_vs_state(struct pipe_context *pipe, +                         const struct pipe_shader_state *templ) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); +   struct sp_vertex_shader *state; + +   state = CALLOC_STRUCT(sp_vertex_shader); +   if (state == NULL ) { +      return NULL; +   } + +   state->draw_data = draw_create_vertex_shader(softpipe->draw, templ); +   if (state->draw_data == NULL) { +      FREE( state ); +      return NULL; +   } + +   return state; +} + + +void +softpipe_bind_vs_state(struct pipe_context *pipe, void *vs) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); + +   softpipe->vs = (const struct sp_vertex_shader *)vs; + +   draw_bind_vertex_shader(softpipe->draw, +                           (softpipe->vs ? softpipe->vs->draw_data : NULL)); + +   softpipe->dirty |= SP_NEW_VS; +} + + +void +softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); + +   struct sp_vertex_shader *state = +      (struct sp_vertex_shader *)vs; + +   draw_delete_vertex_shader(softpipe->draw, state->draw_data); +   FREE( state ); +} + + + +void +softpipe_set_constant_buffer(struct pipe_context *pipe, +                             uint shader, uint index, +                             const struct pipe_constant_buffer *buf) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); +   struct pipe_screen *screen = pipe->screen; + +   assert(shader < PIPE_SHADER_TYPES); +   assert(index == 0); + +   /* note: reference counting */ +   pipe_buffer_reference(screen, +			 &softpipe->constants[shader].buffer, +			 buf ? buf->buffer : NULL); + +   softpipe->dirty |= SP_NEW_CONSTANTS; +} diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c new file mode 100644 index 0000000000..87b7219683 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -0,0 +1,62 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_state.h" +#include "draw/draw_context.h" + + + +void * +softpipe_create_rasterizer_state(struct pipe_context *pipe, +                                 const struct pipe_rasterizer_state *rast) +{ +   return mem_dup(rast, sizeof(*rast)); +} + +void softpipe_bind_rasterizer_state(struct pipe_context *pipe, +                                    void *setup) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); + +   /* pass-through to draw module */ +   draw_set_rasterizer_state(softpipe->draw, setup); + +   softpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + +   softpipe->dirty |= SP_NEW_RASTERIZER; +} + +void softpipe_delete_rasterizer_state(struct pipe_context *pipe, +                                      void *rasterizer) +{ +   FREE( rasterizer ); +} + + diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c new file mode 100644 index 0000000000..99a28c0d7e --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -0,0 +1,118 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors: + *  Brian Paul + */ + +#include "util/u_memory.h" +#include "pipe/p_inlines.h" + +#include "draw/draw_context.h" + +#include "sp_context.h" +#include "sp_context.h" +#include "sp_state.h" +#include "sp_texture.h" +#include "sp_tile_cache.h" +#include "draw/draw_context.h" + + + +void * +softpipe_create_sampler_state(struct pipe_context *pipe, +                              const struct pipe_sampler_state *sampler) +{ +   return mem_dup(sampler, sizeof(*sampler)); +} + + +void +softpipe_bind_sampler_states(struct pipe_context *pipe, +                             unsigned num, void **sampler) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); +   unsigned i; + +   assert(num <= PIPE_MAX_SAMPLERS); + +   /* Check for no-op */ +   if (num == softpipe->num_samplers && +       !memcmp(softpipe->sampler, sampler, num * sizeof(void *))) +      return; + +   draw_flush(softpipe->draw); + +   for (i = 0; i < num; ++i) +      softpipe->sampler[i] = sampler[i]; +   for (i = num; i < PIPE_MAX_SAMPLERS; ++i) +      softpipe->sampler[i] = NULL; + +   softpipe->num_samplers = num; + +   softpipe->dirty |= SP_NEW_SAMPLER; +} + + +void +softpipe_set_sampler_textures(struct pipe_context *pipe, +                              unsigned num, struct pipe_texture **texture) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); +   uint i; + +   assert(num <= PIPE_MAX_SAMPLERS); + +   /* Check for no-op */ +   if (num == softpipe->num_textures && +       !memcmp(softpipe->texture, texture, num * sizeof(struct pipe_texture *))) +      return; + +   draw_flush(softpipe->draw); + +   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { +      struct pipe_texture *tex = i < num ? texture[i] : NULL; + +      pipe_texture_reference(&softpipe->texture[i], tex); +      sp_tile_cache_set_texture(pipe, softpipe->tex_cache[i], tex); +   } + +   softpipe->num_textures = num; + +   softpipe->dirty |= SP_NEW_TEXTURE; +} + + +void +softpipe_delete_sampler_state(struct pipe_context *pipe, +                              void *sampler) +{ +   FREE( sampler ); +} + + + diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c new file mode 100644 index 0000000000..1493c65884 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -0,0 +1,130 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ +#include "pipe/p_inlines.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" + +#include "draw/draw_context.h" + + +/** + * XXX this might get moved someday + * Set the framebuffer surface info: color buffers, zbuffer, stencil buffer. + * Here, we flush the old surfaces and update the tile cache to point to the new + * surfaces. + */ +void +softpipe_set_framebuffer_state(struct pipe_context *pipe, +                               const struct pipe_framebuffer_state *fb) +{ +   struct softpipe_context *sp = softpipe_context(pipe); +   uint i; + +   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { +      /* check if changing cbuf */ +      if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) { +         /* flush old */ +         sp_flush_tile_cache(sp, sp->cbuf_cache[i]); + +         /* assign new */ +         sp->framebuffer.cbufs[i] = fb->cbufs[i]; + +         /* update cache */ +         sp_tile_cache_set_surface(sp->cbuf_cache[i], fb->cbufs[i]); +      } +   } + +   sp->framebuffer.nr_cbufs = fb->nr_cbufs; + +   /* zbuf changing? */ +   if (sp->framebuffer.zsbuf != fb->zsbuf) { +      /* flush old */ +      sp_flush_tile_cache(sp, sp->zsbuf_cache); + +      /* assign new */ +      sp->framebuffer.zsbuf = fb->zsbuf; + +      /* update cache */ +      sp_tile_cache_set_surface(sp->zsbuf_cache, fb->zsbuf); +   } + +#if 0 +   /* XXX combined depth/stencil here */ + +   /* sbuf changing? */ +   if (sp->framebuffer.sbuf != fb->sbuf) { +      /* flush old */ +      sp_flush_tile_cache(sp, sp->sbuf_cache_sep); + +      /* assign new */ +      sp->framebuffer.sbuf = fb->sbuf; + +      /* update cache */ +      if (fb->sbuf != fb->zbuf) { +         /* separate stencil buf */ +         sp->sbuf_cache = sp->sbuf_cache_sep; +         sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf); +      } +      else { +         /* combined depth/stencil */ +         sp->sbuf_cache = sp->zbuf_cache; +         sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf); +      } +   } +#endif + +   /* Tell draw module how deep the Z/depth buffer is */ +   { +      int depth_bits; +      double mrd; +      if (sp->framebuffer.zsbuf) { +         depth_bits = pf_get_component_bits(sp->framebuffer.zsbuf->format, +                                            PIPE_FORMAT_COMP_Z); +      } +      else { +         depth_bits = 0; +      } +      if (depth_bits > 16) { +         mrd = 0.0000001; +      } +      else { +         mrd = 0.00002; +      } +      draw_set_mrd(sp->draw, mrd); +   } + +   sp->framebuffer.width = fb->width; +   sp->framebuffer.height = fb->height; + +   sp->dirty |= SP_NEW_FRAMEBUFFER; +} diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c new file mode 100644 index 0000000000..46b6991195 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -0,0 +1,73 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_surface.h" + +#include "draw/draw_context.h" + + +void +softpipe_set_vertex_elements(struct pipe_context *pipe, +                             unsigned count, +                             const struct pipe_vertex_element *attribs) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); + +   assert(count <= PIPE_MAX_ATTRIBS); + +   memcpy(softpipe->vertex_element, attribs, +          count * sizeof(struct pipe_vertex_element)); +   softpipe->num_vertex_elements = count; + +   softpipe->dirty |= SP_NEW_VERTEX; + +   draw_set_vertex_elements(softpipe->draw, count, attribs); +} + + +void +softpipe_set_vertex_buffers(struct pipe_context *pipe, +                            unsigned count, +                            const struct pipe_vertex_buffer *buffers) +{ +   struct softpipe_context *softpipe = softpipe_context(pipe); + +   assert(count <= PIPE_MAX_ATTRIBS); + +   memcpy(softpipe->vertex_buffer, buffers, count * sizeof(buffers[0])); +   softpipe->num_vertex_buffers = count; + +   softpipe->dirty |= SP_NEW_VERTEX; + +   draw_set_vertex_buffers(softpipe->draw, count, buffers); +} diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c new file mode 100644 index 0000000000..6ade732698 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -0,0 +1,38 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "util/u_rect.h" +#include "sp_context.h" + + + +void +sp_init_surface_functions(struct softpipe_context *sp) +{ +   sp->pipe.surface_copy = util_surface_copy; +   sp->pipe.surface_fill = util_surface_fill; +} diff --git a/src/gallium/drivers/softpipe/sp_surface.h b/src/gallium/drivers/softpipe/sp_surface.h new file mode 100644 index 0000000000..22de3ba43f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_surface.h @@ -0,0 +1,42 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* Authors:  Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_SURFACE_H +#define SP_SURFACE_H + + +struct softpipe_context; + + +extern void +sp_init_surface_functions(struct softpipe_context *sp); + + +#endif /* SP_SURFACE_H */ diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c new file mode 100644 index 0000000000..32aa5025e4 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -0,0 +1,1229 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2008 VMware, Inc.  All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * Texture sampling + * + * Authors: + *   Brian Paul + */ + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_texture.h" +#include "sp_tex_sample.h" +#include "sp_tile_cache.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + + +/* + * Note, the FRAC macro has to work perfectly.  Otherwise you'll sometimes + * see 1-pixel bands of improperly weighted linear-filtered textures. + * The tests/texwrap.c demo is a good test. + * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. + * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). + */ +#define FRAC(f)  ((f) - util_ifloor(f)) + + +/** + * Linear interpolation macro + */ +static INLINE float +lerp(float a, float v0, float v1) +{ +   return v0 + a * (v1 - v0); +} + + +/** + * Do 2D/biliner interpolation of float values. + * v00, v10, v01 and v11 are typically four texture samples in a square/box. + * a and b are the horizontal and vertical interpolants. + * It's important that this function is inlined when compiled with + * optimization!  If we find that's not true on some systems, convert + * to a macro. + */ +static INLINE float +lerp_2d(float a, float b, +        float v00, float v10, float v01, float v11) +{ +   const float temp0 = lerp(a, v00, v10); +   const float temp1 = lerp(a, v01, v11); +   return lerp(b, temp0, temp1); +} + + +/** + * As above, but 3D interpolation of 8 values. + */ +static INLINE float +lerp_3d(float a, float b, float c, +        float v000, float v100, float v010, float v110, +        float v001, float v101, float v011, float v111) +{ +   const float temp0 = lerp_2d(a, b, v000, v100, v010, v110); +   const float temp1 = lerp_2d(a, b, v001, v101, v011, v111); +   return lerp(c, temp0, temp1); +} + + + +/** + * If A is a signed integer, A % B doesn't give the right value for A < 0 + * (in terms of texture repeat).  Just casting to unsigned fixes that. + */ +#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B)) + + +/** + * Apply texture coord wrapping mode and return integer texture indexes + * for a vector of four texcoords (S or T or P). + * \param wrapMode  PIPE_TEX_WRAP_x + * \param s  the incoming texcoords + * \param size  the texture image size + * \param icoord  returns the integer texcoords + * \return  integer texture index + */ +static INLINE void +nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, +                   int icoord[4]) +{ +   uint ch; +   switch (wrapMode) { +   case PIPE_TEX_WRAP_REPEAT: +      /* s limited to [0,1) */ +      /* i limited to [0,size-1] */ +      for (ch = 0; ch < 4; ch++) { +         int i = util_ifloor(s[ch] * size); +         icoord[ch] = REMAINDER(i, size); +      } +      return; +   case PIPE_TEX_WRAP_CLAMP: +      /* s limited to [0,1] */ +      /* i limited to [0,size-1] */ +      for (ch = 0; ch < 4; ch++) { +         if (s[ch] <= 0.0F) +            icoord[ch] = 0; +         else if (s[ch] >= 1.0F) +            icoord[ch] = size - 1; +         else +            icoord[ch] = util_ifloor(s[ch] * size); +      } +      return; +   case PIPE_TEX_WRAP_CLAMP_TO_EDGE: +      { +         /* s limited to [min,max] */ +         /* i limited to [0, size-1] */ +         const float min = 1.0F / (2.0F * size); +         const float max = 1.0F - min; +         for (ch = 0; ch < 4; ch++) { +            if (s[ch] < min) +               icoord[ch] = 0; +            else if (s[ch] > max) +               icoord[ch] = size - 1; +            else +               icoord[ch] = util_ifloor(s[ch] * size); +         } +      } +      return; +   case PIPE_TEX_WRAP_CLAMP_TO_BORDER: +      { +         /* s limited to [min,max] */ +         /* i limited to [-1, size] */ +         const float min = -1.0F / (2.0F * size); +         const float max = 1.0F - min; +         for (ch = 0; ch < 4; ch++) { +            if (s[ch] <= min) +               icoord[ch] = -1; +            else if (s[ch] >= max) +               icoord[ch] = size; +            else +               icoord[ch] = util_ifloor(s[ch] * size); +         } +      } +      return; +   case PIPE_TEX_WRAP_MIRROR_REPEAT: +      { +         const float min = 1.0F / (2.0F * size); +         const float max = 1.0F - min; +         for (ch = 0; ch < 4; ch++) { +            const int flr = util_ifloor(s[ch]); +            float u; +            if (flr & 1) +               u = 1.0F - (s[ch] - (float) flr); +            else +               u = s[ch] - (float) flr; +            if (u < min) +               icoord[ch] = 0; +            else if (u > max) +               icoord[ch] = size - 1; +            else +               icoord[ch] = util_ifloor(u * size); +         } +      } +      return; +   case PIPE_TEX_WRAP_MIRROR_CLAMP: +      for (ch = 0; ch < 4; ch++) { +         /* s limited to [0,1] */ +         /* i limited to [0,size-1] */ +         const float u = fabsf(s[ch]); +         if (u <= 0.0F) +            icoord[ch] = 0; +         else if (u >= 1.0F) +            icoord[ch] = size - 1; +         else +            icoord[ch] = util_ifloor(u * size); +      } +      return; +   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: +      { +         /* s limited to [min,max] */ +         /* i limited to [0, size-1] */ +         const float min = 1.0F / (2.0F * size); +         const float max = 1.0F - min; +         for (ch = 0; ch < 4; ch++) { +            const float u = fabsf(s[ch]); +            if (u < min) +               icoord[ch] = 0; +            else if (u > max) +               icoord[ch] = size - 1; +            else +               icoord[ch] = util_ifloor(u * size); +         } +      } +      return; +   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: +      { +         /* s limited to [min,max] */ +         /* i limited to [0, size-1] */ +         const float min = -1.0F / (2.0F * size); +         const float max = 1.0F - min; +         for (ch = 0; ch < 4; ch++) { +            const float u = fabsf(s[ch]); +            if (u < min) +               icoord[ch] = -1; +            else if (u > max) +               icoord[ch] = size; +            else +               icoord[ch] = util_ifloor(u * size); +         } +      } +      return; +   default: +      assert(0); +   } +} + + +/** + * Used to compute texel locations for linear sampling for four texcoords. + * \param wrapMode  PIPE_TEX_WRAP_x + * \param s  the texcoords + * \param size  the texture image size + * \param icoord0  returns first texture indexes + * \param icoord1  returns second texture indexes (usually icoord0 + 1) + * \param w  returns blend factor/weight between texture indexes + * \param icoord  returns the computed integer texture coords + */ +static INLINE void +linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, +                  int icoord0[4], int icoord1[4], float w[4]) +{ +   uint ch; + +   switch (wrapMode) { +   case PIPE_TEX_WRAP_REPEAT: +      for (ch = 0; ch < 4; ch++) { +         float u = s[ch] * size - 0.5F; +         icoord0[ch] = REMAINDER(util_ifloor(u), size); +         icoord1[ch] = REMAINDER(icoord0[ch] + 1, size); +         w[ch] = FRAC(u); +      } +      break;; +   case PIPE_TEX_WRAP_CLAMP: +      for (ch = 0; ch < 4; ch++) { +         float u = CLAMP(s[ch], 0.0F, 1.0F); +         u = u * size - 0.5f; +         icoord0[ch] = util_ifloor(u); +         icoord1[ch] = icoord0[ch] + 1; +         w[ch] = FRAC(u); +      } +      break;; +   case PIPE_TEX_WRAP_CLAMP_TO_EDGE: +      for (ch = 0; ch < 4; ch++) { +         float u = CLAMP(s[ch], 0.0F, 1.0F); +         u = u * size - 0.5f; +         icoord0[ch] = util_ifloor(u); +         icoord1[ch] = icoord0[ch] + 1; +         if (icoord0[ch] < 0) +            icoord0[ch] = 0; +         if (icoord1[ch] >= (int) size) +            icoord1[ch] = size - 1; +         w[ch] = FRAC(u); +      } +      break;; +   case PIPE_TEX_WRAP_CLAMP_TO_BORDER: +      { +         const float min = -1.0F / (2.0F * size); +         const float max = 1.0F - min; +         for (ch = 0; ch < 4; ch++) { +            float u = CLAMP(s[ch], min, max); +            u = u * size - 0.5f; +            icoord0[ch] = util_ifloor(u); +            icoord1[ch] = icoord0[ch] + 1; +            w[ch] = FRAC(u); +         } +      } +      break;; +   case PIPE_TEX_WRAP_MIRROR_REPEAT: +      for (ch = 0; ch < 4; ch++) { +         const int flr = util_ifloor(s[ch]); +         float u; +         if (flr & 1) +            u = 1.0F - (s[ch] - (float) flr); +         else +            u = s[ch] - (float) flr; +         u = u * size - 0.5F; +         icoord0[ch] = util_ifloor(u); +         icoord1[ch] = icoord0[ch] + 1; +         if (icoord0[ch] < 0) +            icoord0[ch] = 0; +         if (icoord1[ch] >= (int) size) +            icoord1[ch] = size - 1; +         w[ch] = FRAC(u); +      } +      break;; +   case PIPE_TEX_WRAP_MIRROR_CLAMP: +      for (ch = 0; ch < 4; ch++) { +         float u = fabsf(s[ch]); +         if (u >= 1.0F) +            u = (float) size; +         else +            u *= size; +         u -= 0.5F; +         icoord0[ch] = util_ifloor(u); +         icoord1[ch] = icoord0[ch] + 1; +         w[ch] = FRAC(u); +      } +      break;; +   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: +      for (ch = 0; ch < 4; ch++) { +         float u = fabsf(s[ch]); +         if (u >= 1.0F) +            u = (float) size; +         else +            u *= size; +         u -= 0.5F; +         icoord0[ch] = util_ifloor(u); +         icoord1[ch] = icoord0[ch] + 1; +         if (icoord0[ch] < 0) +            icoord0[ch] = 0; +         if (icoord1[ch] >= (int) size) +            icoord1[ch] = size - 1; +         w[ch] = FRAC(u); +      } +      break;; +   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: +      { +         const float min = -1.0F / (2.0F * size); +         const float max = 1.0F - min; +         for (ch = 0; ch < 4; ch++) { +            float u = fabsf(s[ch]); +            if (u <= min) +               u = min * size; +            else if (u >= max) +               u = max * size; +            else +               u *= size; +            u -= 0.5F; +            icoord0[ch] = util_ifloor(u); +            icoord1[ch] = icoord0[ch] + 1; +            w[ch] = FRAC(u); +         } +      } +      break;; +   default: +      assert(0); +   } +} + + +/** + * For RECT textures / unnormalized texcoords + * Only a subset of wrap modes supported. + */ +static INLINE void +nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, +                          int icoord[4]) +{ +   uint ch; +   switch (wrapMode) { +   case PIPE_TEX_WRAP_CLAMP: +      for (ch = 0; ch < 4; ch++) { +         int i = util_ifloor(s[ch]); +         icoord[ch]= CLAMP(i, 0, (int) size-1); +      } +      return; +   case PIPE_TEX_WRAP_CLAMP_TO_EDGE: +      /* fall-through */ +   case PIPE_TEX_WRAP_CLAMP_TO_BORDER: +      for (ch = 0; ch < 4; ch++) { +         icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); +      } +      return; +   default: +      assert(0); +   } +} + + +/** + * For RECT textures / unnormalized texcoords. + * Only a subset of wrap modes supported. + */ +static INLINE void +linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, +                         int icoord0[4], int icoord1[4], float w[4]) +{ +   uint ch; +   switch (wrapMode) { +   case PIPE_TEX_WRAP_CLAMP: +      for (ch = 0; ch < 4; ch++) { +         /* Not exactly what the spec says, but it matches NVIDIA output */ +         float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); +         icoord0[ch] = util_ifloor(u); +         icoord1[ch] = icoord0[ch] + 1; +         w[ch] = FRAC(u); +      } +      return; +   case PIPE_TEX_WRAP_CLAMP_TO_EDGE: +      /* fall-through */ +   case PIPE_TEX_WRAP_CLAMP_TO_BORDER: +      for (ch = 0; ch < 4; ch++) { +         float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); +         u -= 0.5F; +         icoord0[ch] = util_ifloor(u); +         icoord1[ch] = icoord0[ch] + 1; +         if (icoord1[ch] > (int) size - 1) +            icoord1[ch] = size - 1; +         w[ch] = FRAC(u); +      } +      break; +   default: +      assert(0); +   } +} + + +static unsigned +choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) +{ +   /* +      major axis +      direction     target                             sc     tc    ma +      ----------    -------------------------------    ---    ---   --- +       +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx +       -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx +       +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry +       -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry +       +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz +       -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz +   */ +   const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); +   unsigned face; +   float sc, tc, ma; + +   if (arx > ary && arx > arz) { +      if (rx >= 0.0F) { +         face = PIPE_TEX_FACE_POS_X; +         sc = -rz; +         tc = -ry; +         ma = arx; +      } +      else { +         face = PIPE_TEX_FACE_NEG_X; +         sc = rz; +         tc = -ry; +         ma = arx; +      } +   } +   else if (ary > arx && ary > arz) { +      if (ry >= 0.0F) { +         face = PIPE_TEX_FACE_POS_Y; +         sc = rx; +         tc = rz; +         ma = ary; +      } +      else { +         face = PIPE_TEX_FACE_NEG_Y; +         sc = rx; +         tc = -rz; +         ma = ary; +      } +   } +   else { +      if (rz > 0.0F) { +         face = PIPE_TEX_FACE_POS_Z; +         sc = rx; +         tc = -ry; +         ma = arz; +      } +      else { +         face = PIPE_TEX_FACE_NEG_Z; +         sc = -rx; +         tc = -ry; +         ma = arz; +      } +   } + +   *newS = ( sc / ma + 1.0F ) * 0.5F; +   *newT = ( tc / ma + 1.0F ) * 0.5F; + +   return face; +} + + +/** + * Examine the quad's texture coordinates to compute the partial + * derivatives w.r.t X and Y, then compute lambda (level of detail). + * + * This is only done for fragment shaders, not vertex shaders. + */ +static float +compute_lambda(const struct pipe_texture *tex, +               const struct pipe_sampler_state *sampler, +               const float s[QUAD_SIZE], +               const float t[QUAD_SIZE], +               const float p[QUAD_SIZE], +               float lodbias) +{ +   float rho, lambda; + +   assert(sampler->normalized_coords); + +   assert(s); +   { +      float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; +      float dsdy = s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]; +      dsdx = fabsf(dsdx); +      dsdy = fabsf(dsdy); +      rho = MAX2(dsdx, dsdy) * tex->width[0]; +   } +   if (t) { +      float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; +      float dtdy = t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]; +      float max; +      dtdx = fabsf(dtdx); +      dtdy = fabsf(dtdy); +      max = MAX2(dtdx, dtdy) * tex->height[0]; +      rho = MAX2(rho, max); +   } +   if (p) { +      float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; +      float dpdy = p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]; +      float max; +      dpdx = fabsf(dpdx); +      dpdy = fabsf(dpdy); +      max = MAX2(dpdx, dpdy) * tex->depth[0]; +      rho = MAX2(rho, max); +   } + +   lambda = util_fast_log2(rho); +   lambda += lodbias + sampler->lod_bias; +   lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); + +   return lambda; +} + + +/** + * Do several things here: + * 1. Compute lambda from the texcoords, if needed + * 2. Determine if we're minifying or magnifying + * 3. If minifying, choose mipmap levels + * 4. Return image filter to use within mipmap images + * \param level0  Returns first mipmap level to sample from + * \param level1  Returns second mipmap level to sample from + * \param levelBlend  Returns blend factor between levels, in [0,1] + * \param imgFilter  Returns either the min or mag filter, depending on lambda + */ +static void +choose_mipmap_levels(const struct pipe_texture *texture, +                     const struct pipe_sampler_state *sampler, +                     const float s[QUAD_SIZE], +                     const float t[QUAD_SIZE], +                     const float p[QUAD_SIZE], +                     boolean computeLambda, +                     float lodbias, +                     unsigned *level0, unsigned *level1, float *levelBlend, +                     unsigned *imgFilter) +{ +   if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { +      /* no mipmap selection needed */ +      *level0 = *level1 = CLAMP((int) sampler->min_lod, +                                0, (int) texture->last_level); + +      if (sampler->min_img_filter != sampler->mag_img_filter) { +         /* non-mipmapped texture, but still need to determine if doing +          * minification or magnification. +          */ +         float lambda = compute_lambda(texture, sampler, s, t, p, lodbias); +         if (lambda <= 0.0) { +            *imgFilter = sampler->mag_img_filter; +         } +         else { +            *imgFilter = sampler->min_img_filter; +         } +      } +      else { +         *imgFilter = sampler->mag_img_filter; +      } +   } +   else { +      float lambda; + +      if (computeLambda) +         /* fragment shader */ +         lambda = compute_lambda(texture, sampler, s, t, p, lodbias); +      else +         /* vertex shader */ +         lambda = lodbias; /* not really a bias, but absolute LOD */ + +      if (lambda <= 0.0) { /* XXX threshold depends on the filter */ +         /* magnifying */ +         *imgFilter = sampler->mag_img_filter; +         *level0 = *level1 = 0; +      } +      else { +         /* minifying */ +         *imgFilter = sampler->min_img_filter; + +         /* choose mipmap level(s) and compute the blend factor between them */ +         if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { +            /* Nearest mipmap level */ +            const int lvl = (int) (lambda + 0.5); +            *level0 = +            *level1 = CLAMP(lvl, 0, (int) texture->last_level); +         } +         else { +            /* Linear interpolation between mipmap levels */ +            const int lvl = (int) lambda; +            *level0 = CLAMP(lvl,     0, (int) texture->last_level); +            *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level); +            *levelBlend = FRAC(lambda);  /* blending weight between levels */ +         } +      } +   } +} + + +/** + * Get a texel from a texture, using the texture tile cache. + * + * \param face  the cube face in 0..5 + * \param level  the mipmap level + * \param x  the x coord of texel within 2D image + * \param y  the y coord of texel within 2D image + * \param z  which slice of a 3D texture + * \param rgba  the quad to put the texel/color into + * \param j  which element of the rgba quad to write to + * + * XXX maybe move this into sp_tile_cache.c and merge with the + * sp_get_cached_tile_tex() function.  Also, get 4 texels instead of 1... + */ +static void +get_texel(const struct tgsi_sampler *tgsi_sampler, +          unsigned face, unsigned level, int x, int y, int z, +          float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) +{ +   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); +   struct softpipe_context *sp = samp->sp; +   const uint unit = samp->unit; +   const struct pipe_texture *texture = sp->texture[unit]; +   const struct pipe_sampler_state *sampler = sp->sampler[unit]; + +   if (x < 0 || x >= (int) texture->width[level] || +       y < 0 || y >= (int) texture->height[level] || +       z < 0 || z >= (int) texture->depth[level]) { +      rgba[0][j] = sampler->border_color[0]; +      rgba[1][j] = sampler->border_color[1]; +      rgba[2][j] = sampler->border_color[2]; +      rgba[3][j] = sampler->border_color[3]; +   } +   else { +      const int tx = x % TILE_SIZE; +      const int ty = y % TILE_SIZE; +      const struct softpipe_cached_tile *tile +         = sp_get_cached_tile_tex(sp, samp->cache, +                                  x, y, z, face, level); +      rgba[0][j] = tile->data.color[ty][tx][0]; +      rgba[1][j] = tile->data.color[ty][tx][1]; +      rgba[2][j] = tile->data.color[ty][tx][2]; +      rgba[3][j] = tile->data.color[ty][tx][3]; +      if (0) +      { +         debug_printf("Get texel %f %f %f %f from %s\n", +                      rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], +                      pf_name(texture->format)); +      } +   } +} + + +/** + * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' + * When we sampled the depth texture, the depth value was put into all + * RGBA channels.  We look at the red channel here. + */ +static INLINE void +shadow_compare(uint compare_func, +               float rgba[NUM_CHANNELS][QUAD_SIZE], +               const float p[QUAD_SIZE], +               uint j) +{ +   int k; +   switch (compare_func) { +   case PIPE_FUNC_LESS: +      k = p[j] < rgba[0][j]; +      break; +   case PIPE_FUNC_LEQUAL: +      k = p[j] <= rgba[0][j]; +      break; +   case PIPE_FUNC_GREATER: +      k = p[j] > rgba[0][j]; +      break; +   case PIPE_FUNC_GEQUAL: +      k = p[j] >= rgba[0][j]; +      break; +   case PIPE_FUNC_EQUAL: +      k = p[j] == rgba[0][j]; +      break; +   case PIPE_FUNC_NOTEQUAL: +      k = p[j] != rgba[0][j]; +      break; +   case PIPE_FUNC_ALWAYS: +      k = 1; +      break; +   case PIPE_FUNC_NEVER: +      k = 0; +      break; +   default: +      k = 0; +      assert(0); +      break; +   } + +   rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; +} + + +/** + * Common code for sampling 1D/2D/cube textures. + * Could probably extend for 3D... + */ +static void +sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler, +                         const float s[QUAD_SIZE], +                         const float t[QUAD_SIZE], +                         const float p[QUAD_SIZE], +                         boolean computeLambda, +                         float lodbias, +                         float rgba[NUM_CHANNELS][QUAD_SIZE], +                         const unsigned faces[4]) +{ +   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); +   const struct softpipe_context *sp = samp->sp; +   const uint unit = samp->unit; +   const struct pipe_texture *texture = sp->texture[unit]; +   const struct pipe_sampler_state *sampler = sp->sampler[unit]; +   const uint compare_func = sampler->compare_func; +   unsigned level0, level1, j, imgFilter; +   int width, height; +   float levelBlend; + +   choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias, +                        &level0, &level1, &levelBlend, &imgFilter); + +   assert(sampler->normalized_coords); + +   width = texture->width[level0]; +   height = texture->height[level0]; + +   assert(width > 0); + +   switch (imgFilter) { +   case PIPE_TEX_FILTER_NEAREST: +      { +         int x[4], y[4]; +         nearest_texcoord_4(sampler->wrap_s, s, width, x); +         nearest_texcoord_4(sampler->wrap_t, t, height, y); + +         for (j = 0; j < QUAD_SIZE; j++) { +            get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j); +            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { +               shadow_compare(compare_func, rgba, p, j); +            } + +            if (level0 != level1) { +               /* get texels from second mipmap level and blend */ +               float rgba2[4][4]; +               unsigned c; +               x[j] /= 2; +               y[j] /= 2; +               get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0, +                         rgba2, j); +               if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ +                  shadow_compare(compare_func, rgba2, p, j); +               } + +               for (c = 0; c < NUM_CHANNELS; c++) { +                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); +               } +            } +         } +      } +      break; +   case PIPE_TEX_FILTER_LINEAR: +   case PIPE_TEX_FILTER_ANISO: +      { +         int x0[4], y0[4], x1[4], y1[4]; +         float xw[4], yw[4]; /* weights */ + +         linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); +         linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); + +         for (j = 0; j < QUAD_SIZE; j++) { +            float tx[4][4]; /* texels */ +            int c; +            get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0); +            get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1); +            get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2); +            get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3); +            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { +               shadow_compare(compare_func, tx, p, 0); +               shadow_compare(compare_func, tx, p, 1); +               shadow_compare(compare_func, tx, p, 2); +               shadow_compare(compare_func, tx, p, 3); +            } + +            /* interpolate R, G, B, A */ +            for (c = 0; c < 4; c++) { +               rgba[c][j] = lerp_2d(xw[j], yw[j], +                                    tx[c][0], tx[c][1], +                                    tx[c][2], tx[c][3]); +            } + +            if (level0 != level1) { +               /* get texels from second mipmap level and blend */ +               float rgba2[4][4]; +               x0[j] /= 2; +               y0[j] /= 2; +               x1[j] /= 2; +               y1[j] /= 2; +               get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0); +               get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1); +               get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2); +               get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3); +               if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ +                  shadow_compare(compare_func, tx, p, 0); +                  shadow_compare(compare_func, tx, p, 1); +                  shadow_compare(compare_func, tx, p, 2); +                  shadow_compare(compare_func, tx, p, 3); +               } + +               /* interpolate R, G, B, A */ +               for (c = 0; c < 4; c++) { +                  rgba2[c][j] = lerp_2d(xw[j], yw[j], +                                        tx[c][0], tx[c][1], tx[c][2], tx[c][3]); +               } + +               for (c = 0; c < NUM_CHANNELS; c++) { +                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); +               } +            } +         } +      } +      break; +   default: +      assert(0); +   } +} + + +static INLINE void +sp_get_samples_1d(const struct tgsi_sampler *sampler, +                  const float s[QUAD_SIZE], +                  const float t[QUAD_SIZE], +                  const float p[QUAD_SIZE], +                  boolean computeLambda, +                  float lodbias, +                  float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ +   static const unsigned faces[4] = {0, 0, 0, 0}; +   static const float tzero[4] = {0, 0, 0, 0}; +   sp_get_samples_2d_common(sampler, s, tzero, NULL, +                            computeLambda, lodbias, rgba, faces); +} + + +static INLINE void +sp_get_samples_2d(const struct tgsi_sampler *sampler, +                  const float s[QUAD_SIZE], +                  const float t[QUAD_SIZE], +                  const float p[QUAD_SIZE], +                  boolean computeLambda, +                  float lodbias, +                  float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ +   static const unsigned faces[4] = {0, 0, 0, 0}; +   sp_get_samples_2d_common(sampler, s, t, p, +                            computeLambda, lodbias, rgba, faces); +} + + +static INLINE void +sp_get_samples_3d(const struct tgsi_sampler *tgsi_sampler, +                  const float s[QUAD_SIZE], +                  const float t[QUAD_SIZE], +                  const float p[QUAD_SIZE], +                  boolean computeLambda, +                  float lodbias, +                  float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ +   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); +   const struct softpipe_context *sp = samp->sp; +   const uint unit = samp->unit; +   const struct pipe_texture *texture = sp->texture[unit]; +   const struct pipe_sampler_state *sampler = sp->sampler[unit]; +   /* get/map pipe_surfaces corresponding to 3D tex slices */ +   unsigned level0, level1, j, imgFilter; +   int width, height, depth; +   float levelBlend; +   const uint face = 0; + +   choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias, +                        &level0, &level1, &levelBlend, &imgFilter); + +   assert(sampler->normalized_coords); + +   width = texture->width[level0]; +   height = texture->height[level0]; +   depth = texture->depth[level0]; + +   assert(width > 0); +   assert(height > 0); +   assert(depth > 0); + +   switch (imgFilter) { +   case PIPE_TEX_FILTER_NEAREST: +      { +         int x[4], y[4], z[4]; +         nearest_texcoord_4(sampler->wrap_s, s, width, x); +         nearest_texcoord_4(sampler->wrap_t, t, height, y); +         nearest_texcoord_4(sampler->wrap_r, p, depth, z); +         for (j = 0; j < QUAD_SIZE; j++) { +            get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j); +            if (level0 != level1) { +               /* get texels from second mipmap level and blend */ +               float rgba2[4][4]; +               unsigned c; +               x[j] /= 2; +               y[j] /= 2; +               z[j] /= 2; +               get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j); +               for (c = 0; c < NUM_CHANNELS; c++) { +                  rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]); +               } +            } +         } +      } +      break; +   case PIPE_TEX_FILTER_LINEAR: +   case PIPE_TEX_FILTER_ANISO: +      { +         int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; +         float xw[4], yw[4], zw[4]; /* interpolation weights */ +         linear_texcoord_4(sampler->wrap_s, s, width,  x0, x1, xw); +         linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); +         linear_texcoord_4(sampler->wrap_r, p, depth,  z0, z1, zw); + +         for (j = 0; j < QUAD_SIZE; j++) { +            int c; +            float tx0[4][4], tx1[4][4]; +            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0); +            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1); +            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2); +            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3); +            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0); +            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1); +            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2); +            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3); + +            /* interpolate R, G, B, A */ +            for (c = 0; c < 4; c++) { +               rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], +                                    tx0[c][0], tx0[c][1], +                                    tx0[c][2], tx0[c][3], +                                    tx1[c][0], tx1[c][1], +                                    tx1[c][2], tx1[c][3]); +            } + +            if (level0 != level1) { +               /* get texels from second mipmap level and blend */ +               float rgba2[4][4]; +               x0[j] /= 2; +               y0[j] /= 2; +               z0[j] /= 2; +               x1[j] /= 2; +               y1[j] /= 2; +               z1[j] /= 2; +               get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0); +               get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1); +               get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2); +               get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3); +               get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0); +               get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1); +               get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2); +               get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3); + +               /* interpolate R, G, B, A */ +               for (c = 0; c < 4; c++) { +                  rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j], +                                        tx0[c][0], tx0[c][1], +                                        tx0[c][2], tx0[c][3], +                                        tx1[c][0], tx1[c][1], +                                        tx1[c][2], tx1[c][3]); +               } + +               /* blend mipmap levels */ +               for (c = 0; c < NUM_CHANNELS; c++) { +                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); +               } +            } +         } +      } +      break; +   default: +      assert(0); +   } +} + + +static void +sp_get_samples_cube(const struct tgsi_sampler *sampler, +                    const float s[QUAD_SIZE], +                    const float t[QUAD_SIZE], +                    const float p[QUAD_SIZE], +                    boolean computeLambda, +                    float lodbias, +                    float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ +   unsigned faces[QUAD_SIZE], j; +   float ssss[4], tttt[4]; +   for (j = 0; j < QUAD_SIZE; j++) { +      faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); +   } +   sp_get_samples_2d_common(sampler, ssss, tttt, NULL, +                            computeLambda, lodbias, rgba, faces); +} + + +static void +sp_get_samples_rect(const struct tgsi_sampler *tgsi_sampler, +                    const float s[QUAD_SIZE], +                    const float t[QUAD_SIZE], +                    const float p[QUAD_SIZE], +                    boolean computeLambda, +                    float lodbias, +                    float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ +   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); +   const struct softpipe_context *sp = samp->sp; +   const uint unit = samp->unit; +   const struct pipe_texture *texture = sp->texture[unit]; +   const struct pipe_sampler_state *sampler = sp->sampler[unit]; +   const uint face = 0; +   const uint compare_func = sampler->compare_func; +   unsigned level0, level1, j, imgFilter; +   int width, height; +   float levelBlend; + +   choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias, +                        &level0, &level1, &levelBlend, &imgFilter); + +   /* texture RECTS cannot be mipmapped */ +   assert(level0 == level1); + +   width = texture->width[level0]; +   height = texture->height[level0]; + +   assert(width > 0); + +   switch (imgFilter) { +   case PIPE_TEX_FILTER_NEAREST: +      { +         int x[4], y[4]; +         nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x); +         nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y); +         for (j = 0; j < QUAD_SIZE; j++) { +            get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j); +            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { +               shadow_compare(compare_func, rgba, p, j); +            } +         } +      } +      break; +   case PIPE_TEX_FILTER_LINEAR: +   case PIPE_TEX_FILTER_ANISO: +      { +         int x0[4], y0[4], x1[4], y1[4]; +         float xw[4], yw[4]; /* weights */ +         linear_texcoord_unnorm_4(sampler->wrap_s, s, width,  x0, x1, xw); +         linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw); +         for (j = 0; j < QUAD_SIZE; j++) { +            float tx[4][4]; /* texels */ +            int c; +            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0); +            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1); +            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2); +            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3); +            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { +               shadow_compare(compare_func, tx, p, 0); +               shadow_compare(compare_func, tx, p, 1); +               shadow_compare(compare_func, tx, p, 2); +               shadow_compare(compare_func, tx, p, 3); +            } +            for (c = 0; c < 4; c++) { +               rgba[c][j] = lerp_2d(xw[j], yw[j], +                                    tx[c][0], tx[c][1], tx[c][2], tx[c][3]); +            } +         } +      } +      break; +   default: +      assert(0); +   } +} + + +/** + * Common code for vertex/fragment program texture sampling. + */ +static INLINE void +sp_get_samples(struct tgsi_sampler *tgsi_sampler, +               const float s[QUAD_SIZE], +               const float t[QUAD_SIZE], +               const float p[QUAD_SIZE], +               boolean computeLambda, +               float lodbias, +               float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ +   const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); +   const struct softpipe_context *sp = samp->sp; +   const uint unit = samp->unit; +   const struct pipe_texture *texture = sp->texture[unit]; +   const struct pipe_sampler_state *sampler = sp->sampler[unit]; + +   if (!texture) +      return; + +   switch (texture->target) { +   case PIPE_TEXTURE_1D: +      assert(sampler->normalized_coords); +      sp_get_samples_1d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); +      break; +   case PIPE_TEXTURE_2D: +      if (sampler->normalized_coords) +         sp_get_samples_2d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); +      else +         sp_get_samples_rect(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); +      break; +   case PIPE_TEXTURE_3D: +      assert(sampler->normalized_coords); +      sp_get_samples_3d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); +      break; +   case PIPE_TEXTURE_CUBE: +      assert(sampler->normalized_coords); +      sp_get_samples_cube(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); +      break; +   default: +      assert(0); +   } + +#if 0 /* DEBUG */ +   { +      int i; +      printf("Sampled at %f, %f, %f:\n", s[0], t[0], p[0]); +      for (i = 0; i < 4; i++) { +         printf("Frag %d: %f %f %f %f\n", i, +                rgba[0][i], +                rgba[1][i], +                rgba[2][i], +                rgba[3][i]); +      } +   } +#endif +} + + +/** + * Called via tgsi_sampler::get_samples() when running a fragment shader. + * Get four filtered RGBA values from the sampler's texture. + */ +void +sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler, +                        const float s[QUAD_SIZE], +                        const float t[QUAD_SIZE], +                        const float p[QUAD_SIZE], +                        float lodbias, +                        float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ +   sp_get_samples(tgsi_sampler, s, t, p, TRUE, lodbias, rgba); +} + + +/** + * Called via tgsi_sampler::get_samples() when running a vertex shader. + * Get four filtered RGBA values from the sampler's texture. + */ +void +sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler, +                      const float s[QUAD_SIZE], +                      const float t[QUAD_SIZE], +                      const float p[QUAD_SIZE], +                      float lodbias, +                      float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ +   sp_get_samples(tgsi_sampler, s, t, p, FALSE, lodbias, rgba); +} diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h new file mode 100644 index 0000000000..40d8eb2c2a --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -0,0 +1,73 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef SP_TEX_SAMPLE_H +#define SP_TEX_SAMPLE_H + + +#include "tgsi/tgsi_exec.h" + + +/** + * Subclass of tgsi_sampler + */ +struct sp_shader_sampler +{ +   struct tgsi_sampler base;  /**< base class */ + +   uint unit; +   struct softpipe_context *sp; +   struct softpipe_tile_cache *cache; +}; + + + +static INLINE const struct sp_shader_sampler * +sp_shader_sampler(const struct tgsi_sampler *sampler) +{ +   return (const struct sp_shader_sampler *) sampler; +} + + +extern void +sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler, +                        const float s[QUAD_SIZE], +                        const float t[QUAD_SIZE], +                        const float p[QUAD_SIZE], +                        float lodbias, +                        float rgba[NUM_CHANNELS][QUAD_SIZE]); + +extern void +sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler, +                      const float s[QUAD_SIZE], +                      const float t[QUAD_SIZE], +                      const float p[QUAD_SIZE], +                      float lodbias, +                      float rgba[NUM_CHANNELS][QUAD_SIZE]); + + +#endif /* SP_TEX_SAMPLE_H */ diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c new file mode 100644 index 0000000000..3eed0d0d29 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -0,0 +1,348 @@ +/************************************************************************** + *  + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + /* +  * Authors: +  *   Keith Whitwell <keith@tungstengraphics.com> +  *   Michel Dänzer <michel@tungstengraphics.com> +  */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_texture.h" +#include "sp_tile_cache.h" +#include "sp_screen.h" + + +/* Simple, maximally packed layout. + */ + +static unsigned minify( unsigned d ) +{ +   return MAX2(1, d>>1); +} + + +/* Conventional allocation path for non-display textures: + */ +static boolean +softpipe_texture_layout(struct pipe_screen *screen, +                        struct softpipe_texture * spt) +{ +   struct pipe_winsys *ws = screen->winsys; +   struct pipe_texture *pt = &spt->base; +   unsigned level; +   unsigned width = pt->width[0]; +   unsigned height = pt->height[0]; +   unsigned depth = pt->depth[0]; + +   unsigned buffer_size = 0; + +   for (level = 0; level <= pt->last_level; level++) { +      pt->width[level] = width; +      pt->height[level] = height; +      pt->depth[level] = depth; +      pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width);   +      pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);   +      spt->stride[level] = pt->nblocksx[level]*pt->block.size; + +      spt->level_offset[level] = buffer_size; + +      buffer_size += (pt->nblocksy[level] * +                      ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * +                      spt->stride[level]); + +      width  = minify(width); +      height = minify(height); +      depth = minify(depth); +   } + +   spt->buffer = ws->buffer_create(ws, 32, +                                   PIPE_BUFFER_USAGE_PIXEL, +                                   buffer_size); + +   return spt->buffer != NULL; +} + +static boolean +softpipe_displaytarget_layout(struct pipe_screen *screen, +                              struct softpipe_texture * spt) +{ +   struct pipe_winsys *ws = screen->winsys; +   unsigned usage = (PIPE_BUFFER_USAGE_CPU_READ_WRITE | +                     PIPE_BUFFER_USAGE_GPU_READ_WRITE); + +   spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]);   +   spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]);   + +   spt->buffer = ws->surface_buffer_create( ws,  +                                            spt->base.width[0],  +                                            spt->base.height[0], +                                            spt->base.format, +                                            usage, +                                            &spt->stride[0]); + +   return spt->buffer != NULL; +} + + + + + +static struct pipe_texture * +softpipe_texture_create(struct pipe_screen *screen, +                        const struct pipe_texture *templat) +{ +   struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); +   if (!spt) +      return NULL; + +   spt->base = *templat; +   spt->base.refcount = 1; +   spt->base.screen = screen; + +   if (spt->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { +      if (!softpipe_displaytarget_layout(screen, spt)) +         goto fail; +   } +   else { +      if (!softpipe_texture_layout(screen, spt)) +         goto fail; +   } +     +   assert(spt->base.refcount == 1); +   return &spt->base; + + fail: +   FREE(spt); +   return NULL; +} + + +static struct pipe_texture * +softpipe_texture_blanket(struct pipe_screen * screen, +                         const struct pipe_texture *base, +                         const unsigned *stride, +                         struct pipe_buffer *buffer) +{ +   struct softpipe_texture *spt; +   assert(screen); + +   /* Only supports one type */ +   if (base->target != PIPE_TEXTURE_2D || +       base->last_level != 0 || +       base->depth[0] != 1) { +      return NULL; +   } + +   spt = CALLOC_STRUCT(softpipe_texture); +   if (!spt) +      return NULL; + +   spt->base = *base; +   spt->base.refcount = 1; +   spt->base.screen = screen; +   spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]);   +   spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]);   +   spt->stride[0] = stride[0]; + +   pipe_buffer_reference(screen, &spt->buffer, buffer); + +   return &spt->base; +} + + +static void +softpipe_texture_release(struct pipe_screen *screen, +                         struct pipe_texture **pt) +{ +   if (!*pt) +      return; + +   if (--(*pt)->refcount <= 0) { +      struct softpipe_texture *spt = softpipe_texture(*pt); + +      pipe_buffer_reference(screen, &spt->buffer, NULL); +      FREE(spt); +   } +   *pt = NULL; +} + + +static struct pipe_surface * +softpipe_get_tex_surface(struct pipe_screen *screen, +                         struct pipe_texture *pt, +                         unsigned face, unsigned level, unsigned zslice, +                         unsigned usage) +{ +   struct softpipe_texture *spt = softpipe_texture(pt); +   struct pipe_surface *ps; + +   assert(level <= pt->last_level); + +   ps = CALLOC_STRUCT(pipe_surface); +   if (ps) { +      ps->refcount = 1; +      pipe_texture_reference(&ps->texture, pt); +      ps->format = pt->format; +      ps->block = pt->block; +      ps->width = pt->width[level]; +      ps->height = pt->height[level]; +      ps->nblocksx = pt->nblocksx[level]; +      ps->nblocksy = pt->nblocksy[level]; +      ps->stride = spt->stride[level]; +      ps->offset = spt->level_offset[level]; +      ps->usage = usage; + +      /* Because we are softpipe, anything that the state tracker +       * thought was going to be done with the GPU will actually get +       * done with the CPU.  Let's adjust the flags to take that into +       * account. +       */ +      if (ps->usage & PIPE_BUFFER_USAGE_GPU_WRITE) { +         /* GPU_WRITE means "render" and that can involve reads (blending) */ +         ps->usage |= PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_CPU_READ; +      } + +      if (ps->usage & PIPE_BUFFER_USAGE_GPU_READ) +         ps->usage |= PIPE_BUFFER_USAGE_CPU_READ; + +      if (ps->usage & (PIPE_BUFFER_USAGE_CPU_WRITE | +                       PIPE_BUFFER_USAGE_GPU_WRITE)) { +         /* Mark the surface as dirty.  The tile cache will look for this. */ +         spt->modified = TRUE; +      } + +      ps->face = face; +      ps->level = level; +      ps->zslice = zslice; + +      if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { +         ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * +            ps->nblocksy * +            ps->stride; +      } +      else { +         assert(face == 0); +         assert(zslice == 0); +      } +   } +   return ps; +} + + +static void  +softpipe_tex_surface_release(struct pipe_screen *screen,  +                             struct pipe_surface **s) +{ +   struct pipe_surface *surf = *s; +   /* Effectively do the texture_update work here - if texture images +    * needed post-processing to put them into hardware layout, this is +    * where it would happen.  For softpipe, nothing to do. +    */ +   assert(surf->texture); +   if (--surf->refcount == 0) { +      pipe_texture_reference(&surf->texture, NULL); +      FREE(surf); +   } +   *s = NULL; +} + + +static void * +softpipe_surface_map( struct pipe_screen *screen, +                      struct pipe_surface *surface, +                      unsigned flags ) +{ +   ubyte *map; +   struct softpipe_texture *spt; + +   if (flags & ~surface->usage) { +      assert(0); +      return NULL; +   } + +   assert(surface->texture); +   spt = softpipe_texture(surface->texture); +   map = pipe_buffer_map(screen, spt->buffer, flags); +   if (map == NULL) +      return NULL; + +   /* May want to different things here depending on read/write nature +    * of the map: +    */ +   if (surface->texture && +       (flags & PIPE_BUFFER_USAGE_CPU_WRITE))  +   { +      /* Do something to notify sharing contexts of a texture change. +       * In softpipe, that would mean flushing the texture cache. +       */ +      softpipe_screen(screen)->timestamp++; +   } +    +   return map + surface->offset; +} + + +static void +softpipe_surface_unmap(struct pipe_screen *screen, +                       struct pipe_surface *surface) +{ +   struct softpipe_texture *spt; + +   assert(surface->texture); +   spt = softpipe_texture(surface->texture); + +   pipe_buffer_unmap( screen, spt->buffer ); +} + + +void +softpipe_init_texture_funcs(struct softpipe_context *sp) +{ +} + + +void +softpipe_init_screen_texture_funcs(struct pipe_screen *screen) +{ +   screen->texture_create = softpipe_texture_create; +   screen->texture_blanket = softpipe_texture_blanket; +   screen->texture_release = softpipe_texture_release; + +   screen->get_tex_surface = softpipe_get_tex_surface; +   screen->tex_surface_release = softpipe_tex_surface_release; + +   screen->surface_map = softpipe_surface_map; +   screen->surface_unmap = softpipe_surface_unmap; +} diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h new file mode 100644 index 0000000000..c1636920cd --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -0,0 +1,70 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef SP_TEXTURE_H +#define SP_TEXTURE_H + + +#include "pipe/p_state.h" + + +struct pipe_context; +struct pipe_screen; +struct softpipe_context; + + +struct softpipe_texture +{ +   struct pipe_texture base; + +   unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; +   unsigned stride[PIPE_MAX_TEXTURE_LEVELS]; + +   /* The data is held here: +    */ +   struct pipe_buffer *buffer; + +   boolean modified; +}; + + +/** cast wrapper */ +static INLINE struct softpipe_texture * +softpipe_texture(struct pipe_texture *pt) +{ +   return (struct softpipe_texture *) pt; +} + + +extern void +softpipe_init_texture_funcs( struct softpipe_context *softpipe ); + +extern void +softpipe_init_screen_texture_funcs(struct pipe_screen *screen); + + +#endif /* SP_TEXTURE */ diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c new file mode 100644 index 0000000000..ab76009375 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -0,0 +1,614 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/** + * Framebuffer/surface tile caching. + * + * Author: + *    Brian Paul + */ + +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "util/u_tile.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_texture.h" +#include "sp_tile_cache.h" + +#define NUM_ENTRIES 32 + + +/** XXX move these */ +#define MAX_WIDTH 2048 +#define MAX_HEIGHT 2048 + + +struct softpipe_tile_cache +{ +   struct pipe_screen *screen; +   struct pipe_surface *surface;  /**< the surface we're caching */ +   void *surface_map; +   struct pipe_texture *texture;  /**< if caching a texture */ +   struct softpipe_cached_tile entries[NUM_ENTRIES]; +   uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32]; +   float clear_color[4]; +   uint clear_val; +   boolean depth_stencil; /** Is the surface a depth/stencil format? */ + +   struct pipe_surface *tex_surf; +   void *tex_surf_map; +   int tex_face, tex_level, tex_z; + +   struct softpipe_cached_tile tile;  /**< scratch tile for clears */ +}; + + +/** + * Return the position in the cache for the tile that contains win pos (x,y). + * We currently use a direct mapped cache so this is like a hack key. + * At some point we should investige something more sophisticated, like + * a LRU replacement policy. + */ +#define CACHE_POS(x, y) \ +   (((x) / TILE_SIZE + ((y) / TILE_SIZE) * 5) % NUM_ENTRIES) + + + +/** + * Is the tile at (x,y) in cleared state? + */ +static INLINE uint +is_clear_flag_set(const uint *bitvec, int x, int y) +{ +   int pos, bit; +   x /= TILE_SIZE; +   y /= TILE_SIZE; +   pos = y * (MAX_WIDTH / TILE_SIZE) + x; +   assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32); +   bit = bitvec[pos / 32] & (1 << (pos & 31)); +   return bit; +} +    + +/** + * Mark the tile at (x,y) as not cleared. + */ +static INLINE void +clear_clear_flag(uint *bitvec, int x, int y) +{ +   int pos; +   x /= TILE_SIZE; +   y /= TILE_SIZE; +   pos = y * (MAX_WIDTH / TILE_SIZE) + x; +   assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32); +   bitvec[pos / 32] &= ~(1 << (pos & 31)); +} +    + +struct softpipe_tile_cache * +sp_create_tile_cache( struct pipe_screen *screen ) +{ +   struct softpipe_tile_cache *tc; +   uint pos; + +   tc = CALLOC_STRUCT( softpipe_tile_cache ); +   if (tc) { +      tc->screen = screen; +      for (pos = 0; pos < NUM_ENTRIES; pos++) { +         tc->entries[pos].x = +         tc->entries[pos].y = -1; +      } +   } +   return tc; +} + + +void +sp_destroy_tile_cache(struct softpipe_tile_cache *tc) +{ +   uint pos; + +   for (pos = 0; pos < NUM_ENTRIES; pos++) { +      /*assert(tc->entries[pos].x < 0);*/ +   } +   if (tc->surface) { +      pipe_surface_reference(&tc->surface, NULL); +   } +   if (tc->tex_surf) { +      pipe_surface_reference(&tc->tex_surf, NULL); +   } + +   FREE( tc ); +} + + +/** + * Specify the surface to cache. + */ +void +sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, +                          struct pipe_surface *ps) +{ +   assert(!tc->texture); + +   if (tc->surface_map) { +      tc->screen->surface_unmap(tc->screen, tc->surface); +      tc->surface_map = NULL; +   } + +   pipe_surface_reference(&tc->surface, ps); + +   if (tc->surface) { +      if (tc->surface_map) /* XXX: this is always NULL!? */ +	 tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface, +                                                   PIPE_BUFFER_USAGE_CPU_READ |  +                                                   PIPE_BUFFER_USAGE_CPU_WRITE); + +      tc->depth_stencil = (ps->format == PIPE_FORMAT_S8Z24_UNORM || +                           ps->format == PIPE_FORMAT_X8Z24_UNORM || +                           ps->format == PIPE_FORMAT_Z24S8_UNORM || +                           ps->format == PIPE_FORMAT_Z24X8_UNORM || +                           ps->format == PIPE_FORMAT_Z16_UNORM || +                           ps->format == PIPE_FORMAT_Z32_UNORM || +                           ps->format == PIPE_FORMAT_S8_UNORM); +   } +} + + +/** + * Return the surface being cached. + */ +struct pipe_surface * +sp_tile_cache_get_surface(struct softpipe_tile_cache *tc) +{ +   return tc->surface; +} + + +void +sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc) +{ +   if (tc->surface && !tc->surface_map) +      tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface, +                                                PIPE_BUFFER_USAGE_CPU_WRITE | +                                                PIPE_BUFFER_USAGE_CPU_READ); + +   if (tc->tex_surf && !tc->tex_surf_map) +      tc->tex_surf_map = tc->screen->surface_map(tc->screen, tc->tex_surf, +                                                 PIPE_BUFFER_USAGE_CPU_READ); +} + + +void +sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc) +{ +   if (tc->surface_map) { +      tc->screen->surface_unmap(tc->screen, tc->surface); +      tc->surface_map = NULL; +   } + +   if (tc->tex_surf_map) { +      tc->screen->surface_unmap(tc->screen, tc->tex_surf); +      tc->tex_surf_map = NULL; +   } +} + + +/** + * Specify the texture to cache. + */ +void +sp_tile_cache_set_texture(struct pipe_context *pipe, +                          struct softpipe_tile_cache *tc, +                          struct pipe_texture *texture) +{ +   uint i; + +   assert(!tc->surface); + +   pipe_texture_reference(&tc->texture, texture); + +   if (tc->tex_surf_map) { +      tc->screen->surface_unmap(tc->screen, tc->tex_surf); +      tc->tex_surf_map = NULL; +   } +   pipe_surface_reference(&tc->tex_surf, NULL); + +   /* mark as entries as invalid/empty */ +   /* XXX we should try to avoid this when the teximage hasn't changed */ +   for (i = 0; i < NUM_ENTRIES; i++) { +      tc->entries[i].x = -1; +   } + +   tc->tex_face = -1; /* any invalid value here */ +} + + +/** + * Set pixels in a tile to the given clear color/value, float. + */ +static void +clear_tile_rgba(struct softpipe_cached_tile *tile, +                enum pipe_format format, +                const float clear_value[4]) +{ +   if (clear_value[0] == 0.0 && +       clear_value[1] == 0.0 && +       clear_value[2] == 0.0 && +       clear_value[3] == 0.0) { +      memset(tile->data.color, 0, sizeof(tile->data.color)); +   } +   else { +      uint i, j; +      for (i = 0; i < TILE_SIZE; i++) { +         for (j = 0; j < TILE_SIZE; j++) { +            tile->data.color[i][j][0] = clear_value[0]; +            tile->data.color[i][j][1] = clear_value[1]; +            tile->data.color[i][j][2] = clear_value[2]; +            tile->data.color[i][j][3] = clear_value[3]; +         } +      } +   } +} + + +/** + * Set a tile to a solid value/color. + */ +static void +clear_tile(struct softpipe_cached_tile *tile, +           enum pipe_format format, +           uint clear_value) +{ +   uint i, j; + +   switch (pf_get_size(format)) { +   case 1: +      memset(tile->data.any, 0, TILE_SIZE * TILE_SIZE); +      break; +   case 2: +      if (clear_value == 0) { +         memset(tile->data.any, 0, 2 * TILE_SIZE * TILE_SIZE); +      } +      else { +         for (i = 0; i < TILE_SIZE; i++) { +            for (j = 0; j < TILE_SIZE; j++) { +               tile->data.depth16[i][j] = (ushort) clear_value; +            } +         } +      } +      break; +   case 4: +      if (clear_value == 0) { +         memset(tile->data.any, 0, 4 * TILE_SIZE * TILE_SIZE); +      } +      else { +         for (i = 0; i < TILE_SIZE; i++) { +            for (j = 0; j < TILE_SIZE; j++) { +               tile->data.color32[i][j] = clear_value; +            } +         } +      } +      break; +   default: +      assert(0); +   } +} + + +/** + * Actually clear the tiles which were flagged as being in a clear state. + */ +static void +sp_tile_cache_flush_clear(struct pipe_context *pipe, +                          struct softpipe_tile_cache *tc) +{ +   struct pipe_surface *ps = tc->surface; +   const uint w = tc->surface->width; +   const uint h = tc->surface->height; +   uint x, y; +   uint numCleared = 0; + +   /* clear the scratch tile to the clear value */ +   clear_tile(&tc->tile, ps->format, tc->clear_val); + +   /* push the tile to all positions marked as clear */ +   for (y = 0; y < h; y += TILE_SIZE) { +      for (x = 0; x < w; x += TILE_SIZE) { +         if (is_clear_flag_set(tc->clear_flags, x, y)) { +            pipe_put_tile_raw(ps, +                              x, y, TILE_SIZE, TILE_SIZE, +                              tc->tile.data.color32, 0/*STRIDE*/); + +            /* do this? */ +            clear_clear_flag(tc->clear_flags, x, y); + +            numCleared++; +         } +      } +   } +#if 0 +   debug_printf("num cleared: %u\n", numCleared); +#endif +} + + +/** + * Flush the tile cache: write all dirty tiles back to the surface. + * any tiles "flagged" as cleared will be "really" cleared. + */ +void +sp_flush_tile_cache(struct softpipe_context *softpipe, +                    struct softpipe_tile_cache *tc) +{ +   struct pipe_surface *ps = tc->surface; +   int inuse = 0, pos; + +   if (ps) { +      /* caching a drawing surface */ +      for (pos = 0; pos < NUM_ENTRIES; pos++) { +         struct softpipe_cached_tile *tile = tc->entries + pos; +         if (tile->x >= 0) { +            if (tc->depth_stencil) { +               pipe_put_tile_raw(ps, +                                 tile->x, tile->y, TILE_SIZE, TILE_SIZE, +                                 tile->data.depth32, 0/*STRIDE*/); +            } +            else { +               pipe_put_tile_rgba(ps, +                                  tile->x, tile->y, TILE_SIZE, TILE_SIZE, +                                  (float *) tile->data.color); +            } +            tile->x = tile->y = -1;  /* mark as empty */ +            inuse++; +         } +      } + +#if TILE_CLEAR_OPTIMIZATION +      sp_tile_cache_flush_clear(&softpipe->pipe, tc); +#endif +   } +   else if (tc->texture) { +      /* caching a texture, mark all entries as empty */ +      for (pos = 0; pos < NUM_ENTRIES; pos++) { +         tc->entries[pos].x = -1; +      } +      tc->tex_face = -1; +   } + +#if 0 +   debug_printf("flushed tiles in use: %d\n", inuse); +#endif +} + + +/** + * Get a tile from the cache. + * \param x, y  position of tile, in pixels + */ +struct softpipe_cached_tile * +sp_get_cached_tile(struct softpipe_context *softpipe, +                   struct softpipe_tile_cache *tc, int x, int y) +{ +   struct pipe_surface *ps = tc->surface; + +   /* tile pos in framebuffer: */ +   const int tile_x = x & ~(TILE_SIZE - 1); +   const int tile_y = y & ~(TILE_SIZE - 1); + +   /* cache pos/entry: */ +   const int pos = CACHE_POS(x, y); +   struct softpipe_cached_tile *tile = tc->entries + pos; + +   if (tile_x != tile->x || +       tile_y != tile->y) { + +      if (tile->x != -1) { +         /* put dirty tile back in framebuffer */ +         if (tc->depth_stencil) { +            pipe_put_tile_raw(ps, +                              tile->x, tile->y, TILE_SIZE, TILE_SIZE, +                              tile->data.depth32, 0/*STRIDE*/); +         } +         else { +            pipe_put_tile_rgba(ps, +                               tile->x, tile->y, TILE_SIZE, TILE_SIZE, +                               (float *) tile->data.color); +         } +      } + +      tile->x = tile_x; +      tile->y = tile_y; + +      if (is_clear_flag_set(tc->clear_flags, x, y)) { +         /* don't get tile from framebuffer, just clear it */ +         if (tc->depth_stencil) { +            clear_tile(tile, ps->format, tc->clear_val); +         } +         else { +            clear_tile_rgba(tile, ps->format, tc->clear_color); +         } +         clear_clear_flag(tc->clear_flags, x, y); +      } +      else { +         /* get new tile data from surface */ +         if (tc->depth_stencil) { +            pipe_get_tile_raw(ps, +                              tile->x, tile->y, TILE_SIZE, TILE_SIZE, +                              tile->data.depth32, 0/*STRIDE*/); +         } +         else { +            pipe_get_tile_rgba(ps, +                               tile->x, tile->y, TILE_SIZE, TILE_SIZE, +                               (float *) tile->data.color); +         } +      } +   } + +   return tile; +} + + +/** + * Given the texture face, level, zslice, x and y values, compute + * the cache entry position/index where we'd hope to find the + * cached texture tile. + * This is basically a direct-map cache. + * XXX There's probably lots of ways in which we can improve this. + */ +static INLINE uint +tex_cache_pos(int x, int y, int z, int face, int level) +{ +   uint entry = x + y * 5 + z * 4 + face + level; +   return entry % NUM_ENTRIES; +} + + +/** + * Similar to sp_get_cached_tile() but for textures. + * Tiles are read-only and indexed with more params. + */ +const struct softpipe_cached_tile * +sp_get_cached_tile_tex(struct softpipe_context *sp, +                       struct softpipe_tile_cache *tc, int x, int y, int z, +                       int face, int level) +{ +   struct pipe_screen *screen = sp->pipe.screen; +   /* tile pos in framebuffer: */ +   const int tile_x = x & ~(TILE_SIZE - 1); +   const int tile_y = y & ~(TILE_SIZE - 1); +   /* cache pos/entry: */ +   const uint pos = tex_cache_pos(x / TILE_SIZE, y / TILE_SIZE, z, +                                  face, level); +   struct softpipe_cached_tile *tile = tc->entries + pos; + +   if (tc->texture) { +      struct softpipe_texture *spt = softpipe_texture(tc->texture); +      if (spt->modified) { +         /* texture was modified, force a cache reload */ +         tile->x = -1; +         spt->modified = FALSE; +      } +   } + +   if (tile_x != tile->x || +       tile_y != tile->y || +       z != tile->z || +       face != tile->face || +       level != tile->level) { +      /* cache miss */ + +      /* check if we need to get a new surface */ +      if (!tc->tex_surf || +          tc->tex_face != face || +          tc->tex_level != level || +          tc->tex_z != z) { +         /* get new surface (view into texture) */ + +	 if (tc->tex_surf_map) +            tc->screen->surface_unmap(tc->screen, tc->tex_surf); + +         tc->tex_surf = screen->get_tex_surface(screen, tc->texture, face, level, z,  +                                                PIPE_BUFFER_USAGE_CPU_READ); +         tc->tex_surf_map = screen->surface_map(screen, tc->tex_surf, +                                                PIPE_BUFFER_USAGE_CPU_READ); + +         tc->tex_face = face; +         tc->tex_level = level; +         tc->tex_z = z; +      } + +      /* get tile from the surface (view into texture) */ +      pipe_get_tile_rgba(tc->tex_surf, +                         tile_x, tile_y, TILE_SIZE, TILE_SIZE, +                         (float *) tile->data.color); +      tile->x = tile_x; +      tile->y = tile_y; +      tile->z = z; +      tile->face = face; +      tile->level = level; +   } + +   return tile; +} + + +/** + * When a whole surface is being cleared to a value we can avoid + * fetching tiles above. + * Save the color and set a 'clearflag' for each tile of the screen. + */ +void +sp_tile_cache_clear(struct softpipe_tile_cache *tc, uint clearValue) +{ +   uint r, g, b, a; +   uint pos; + +   tc->clear_val = clearValue; + +   switch (tc->surface->format) { +   case PIPE_FORMAT_R8G8B8A8_UNORM: +      r = (clearValue >> 24) & 0xff; +      g = (clearValue >> 16) & 0xff; +      b = (clearValue >>  8) & 0xff; +      a = (clearValue      ) & 0xff; +      break; +   case PIPE_FORMAT_A8R8G8B8_UNORM: +      r = (clearValue >> 16) & 0xff; +      g = (clearValue >>  8) & 0xff; +      b = (clearValue      ) & 0xff; +      a = (clearValue >> 24) & 0xff; +      break; +   case PIPE_FORMAT_B8G8R8A8_UNORM: +      r = (clearValue >>  8) & 0xff; +      g = (clearValue >> 16) & 0xff; +      b = (clearValue >> 24) & 0xff; +      a = (clearValue      ) & 0xff; +      break; +   default: +      r = g = b = a = 0; +   } + +   tc->clear_color[0] = r / 255.0f; +   tc->clear_color[1] = g / 255.0f; +   tc->clear_color[2] = b / 255.0f; +   tc->clear_color[3] = a / 255.0f; + +#if TILE_CLEAR_OPTIMIZATION +   /* set flags to indicate all the tiles are cleared */ +   memset(tc->clear_flags, 255, sizeof(tc->clear_flags)); +#else +   /* disable the optimization */ +   memset(tc->clear_flags, 0, sizeof(tc->clear_flags)); +#endif + +   for (pos = 0; pos < NUM_ENTRIES; pos++) { +      struct softpipe_cached_tile *tile = tc->entries + pos; +      tile->x = tile->y = -1; +   } +} diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h new file mode 100644 index 0000000000..a66bb50bcc --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -0,0 +1,105 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#ifndef SP_TILE_CACHE_H +#define SP_TILE_CACHE_H + +#define TILE_CLEAR_OPTIMIZATION 1 + + +#include "pipe/p_compiler.h" + + +struct softpipe_context; +struct softpipe_tile_cache; + + +/** + * Cache tile size (width and height). This needs to be a power of two. + */ +#define TILE_SIZE 64 + + + +struct softpipe_cached_tile +{ +   int x, y;           /**< pos of tile in window coords */ +   int z, face, level; /**< Extra texture indexes */ +   union { +      float color[TILE_SIZE][TILE_SIZE][4]; +      uint color32[TILE_SIZE][TILE_SIZE]; +      uint depth32[TILE_SIZE][TILE_SIZE]; +      ushort depth16[TILE_SIZE][TILE_SIZE]; +      ubyte stencil8[TILE_SIZE][TILE_SIZE]; +      ubyte any[1]; +   } data; +}; + + +extern struct softpipe_tile_cache * +sp_create_tile_cache( struct pipe_screen *screen ); + +extern void +sp_destroy_tile_cache(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, +                          struct pipe_surface *sps); + +extern struct pipe_surface * +sp_tile_cache_get_surface(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_set_texture(struct pipe_context *pipe, +                          struct softpipe_tile_cache *tc, +                          struct pipe_texture *texture); + +extern void +sp_flush_tile_cache(struct softpipe_context *softpipe, +                    struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_clear(struct softpipe_tile_cache *tc, uint clearValue); + +extern struct softpipe_cached_tile * +sp_get_cached_tile(struct softpipe_context *softpipe, +                   struct softpipe_tile_cache *tc, int x, int y); + +extern const struct softpipe_cached_tile * +sp_get_cached_tile_tex(struct softpipe_context *softpipe, +                       struct softpipe_tile_cache *tc, int x, int y, int z, +                       int face, int level); + + +#endif /* SP_TILE_CACHE_H */ + diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h new file mode 100644 index 0000000000..4ab666486c --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_winsys.h @@ -0,0 +1,73 @@ +/************************************************************************** + *  + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +/* This is the interface that softpipe requires any window system + * hosting it to implement.  This is the only include file in softpipe + * which is public. + */ + + +#ifndef SP_WINSYS_H +#define SP_WINSYS_H + + +#include "pipe/p_compiler.h" /* for boolean */ + + +#ifdef __cplusplus +extern "C" { +#endif + + +enum pipe_format; + +struct softpipe_winsys { +   /** test if the given format is supported for front/back color bufs */ +   boolean (*is_format_supported)( struct softpipe_winsys *sws, +                                   enum pipe_format format ); + +}; + +struct pipe_screen; +struct pipe_winsys; +struct pipe_context; + + +struct pipe_context *softpipe_create( struct pipe_screen *, +                                      struct pipe_winsys *, +				      void *unused ); + + +struct pipe_screen * +softpipe_create_screen(struct pipe_winsys *); + + +#ifdef __cplusplus +} +#endif + +#endif /* SP_WINSYS_H */ diff --git a/src/gallium/drivers/trace/Makefile b/src/gallium/drivers/trace/Makefile new file mode 100644 index 0000000000..3859b8acb0 --- /dev/null +++ b/src/gallium/drivers/trace/Makefile @@ -0,0 +1,18 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = trace + +C_SOURCES = \ +	tr_context.c \ +	tr_dump.c \ +	tr_screen.c \ +	tr_state.c \ +	tr_texture.c \ +	tr_winsys.c  + + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README new file mode 100644 index 0000000000..f0e1cd596d --- /dev/null +++ b/src/gallium/drivers/trace/README @@ -0,0 +1,64 @@ +                             TRACE PIPE DRIVER + + += About = + +This directory contains a Gallium3D pipe driver which traces all incoming calls. + + += Build Instructions = + +To build, invoke scons on the top dir as +  + scons statetrackers=mesa drivers=softpipe,i965simple,trace winsys=xlib + + += Usage = + +To use do + + ln -s libGL.so build/linux-x86-debug/gallium/winsys/xlib/libGL.so.1 + export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/gallium/winsys/xlib + +ensure the right libGL.so is being picked by doing + + ldd progs/trivial/tri  + +and then try running + + GALLIUM_TRACE=tri.trace progs/trivial/tri + +which should create a tri.trace file, which is an XML file. You can view copying  +trace.xsl to the same directory, and opening with a XSLT capable browser such as  +Firefox or Internet Explorer. + + += Integrating = + +You can integrate the trace pipe driver either inside the state tracker or the  +winsys. The procedure on both cases is the same. Let's assume you have a  +pipe_screen and a pipe_context pair obtained by the usual means (variable and  +function names are just for illustration purposes): + +  real_screen = real_screen_create(...); +   +  real_context = real_context_create(...); +   +The trace screen and pipe_context is then created by doing + +  trace_screen = trace_screen_create(real_screen); +   +  trace_context = trace_context_create(trace_screen, real_context); +   +You can then simply use trace_screen and trace_context instead of real_screen +and real_context. + +Do not call trace_winsys_create. Simply pass trace_screen->winsys or  +trace_context->winsys in places you would pass winsys. + +You can create as many contexts you wish. Just ensure that you don't mistake  +trace_screen with real_screen when creating them. + + +-- +Jose Fonseca <jrfonseca@tungstengraphics.com> diff --git a/src/gallium/drivers/trace/SConscript b/src/gallium/drivers/trace/SConscript new file mode 100644 index 0000000000..0a6bfb8f4c --- /dev/null +++ b/src/gallium/drivers/trace/SConscript @@ -0,0 +1,16 @@ +Import('*') + +env = env.Clone() + +trace = env.ConvenienceLibrary( +    target = 'trace', +    source = [ +        'tr_context.c', +        'tr_dump.c', +        'tr_screen.c', +        'tr_state.c', +        'tr_texture.c', +        'tr_winsys.c', +    ]) + +Export('trace')
\ No newline at end of file diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c new file mode 100644 index 0000000000..ec8be27077 --- /dev/null +++ b/src/gallium/drivers/trace/tr_context.c @@ -0,0 +1,1072 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "pipe/p_screen.h" + +#include "tr_dump.h" +#include "tr_state.h" +#include "tr_screen.h" +#include "tr_texture.h" +#include "tr_winsys.h" +#include "tr_context.h" + + +static INLINE struct pipe_texture *  +trace_texture_unwrap(struct trace_context *tr_ctx, +                     struct pipe_texture *texture) +{ +   struct trace_screen *tr_scr = trace_screen(tr_ctx->base.screen);  +   struct trace_texture *tr_tex; +    +   if(!texture) +      return NULL; +    +   tr_tex = trace_texture(tr_scr, texture); +    +   assert(tr_tex->texture); +   assert(tr_tex->texture->screen == tr_scr->screen); +   return tr_tex->texture; +} + + +static INLINE struct pipe_surface *  +trace_surface_unwrap(struct trace_context *tr_ctx, +                     struct pipe_surface *surface) +{ +   struct trace_screen *tr_scr = trace_screen(tr_ctx->base.screen);  +   struct trace_texture *tr_tex; +   struct trace_surface *tr_surf; +    +   if(!surface) +      return NULL; + +   assert(surface->texture); +   if(!surface->texture) +      return surface; +    +   tr_tex = trace_texture(tr_scr, surface->texture); +   tr_surf = trace_surface(tr_tex, surface); +    +   assert(tr_surf->surface); +   assert(tr_surf->surface->texture->screen == tr_scr->screen); +   return tr_surf->surface; +} + + +static INLINE void +trace_context_set_edgeflags(struct pipe_context *_pipe, +                            const unsigned *bitfield) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "set_edgeflags"); +    +   trace_dump_arg(ptr, pipe); +   /* FIXME: we don't know how big this array is */ +   trace_dump_arg(ptr, bitfield); + +   pipe->set_edgeflags(pipe, bitfield);; + +   trace_dump_call_end(); +} + + +static INLINE boolean +trace_context_draw_arrays(struct pipe_context *_pipe, +                          unsigned mode, unsigned start, unsigned count) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; +   boolean result; + +   trace_dump_call_begin("pipe_context", "draw_arrays"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(uint, mode); +   trace_dump_arg(uint, start); +   trace_dump_arg(uint, count); + +   result = pipe->draw_arrays(pipe, mode, start, count);; + +   trace_dump_ret(bool, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static INLINE boolean +trace_context_draw_elements(struct pipe_context *_pipe, +                          struct pipe_buffer *indexBuffer, +                          unsigned indexSize, +                          unsigned mode, unsigned start, unsigned count) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; +   boolean result; + +   trace_winsys_user_buffer_update(_pipe->winsys, indexBuffer); + +   trace_dump_call_begin("pipe_context", "draw_elements"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, indexBuffer); +   trace_dump_arg(uint, indexSize); +   trace_dump_arg(uint, mode); +   trace_dump_arg(uint, start); +   trace_dump_arg(uint, count); + +   result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);; + +   trace_dump_ret(bool, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static INLINE boolean +trace_context_draw_range_elements(struct pipe_context *_pipe, +                                  struct pipe_buffer *indexBuffer, +                                  unsigned indexSize, +                                  unsigned minIndex, +                                  unsigned maxIndex, +                                  unsigned mode,  +                                  unsigned start,  +                                  unsigned count) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; +   boolean result; + +   trace_winsys_user_buffer_update(_pipe->winsys, indexBuffer); + +   trace_dump_call_begin("pipe_context", "draw_range_elements"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, indexBuffer); +   trace_dump_arg(uint, indexSize); +   trace_dump_arg(uint, minIndex); +   trace_dump_arg(uint, maxIndex); +   trace_dump_arg(uint, mode); +   trace_dump_arg(uint, start); +   trace_dump_arg(uint, count); + +   result = pipe->draw_range_elements(pipe,  +                                      indexBuffer,  +                                      indexSize, minIndex, maxIndex,  +                                      mode, start, count); +    +   trace_dump_ret(bool, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static INLINE struct pipe_query * +trace_context_create_query(struct pipe_context *_pipe, +                           unsigned query_type) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; +   struct pipe_query *result; + +   trace_dump_call_begin("pipe_context", "create_query"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(uint, query_type); + +   result = pipe->create_query(pipe, query_type);; + +   trace_dump_ret(ptr, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static INLINE void +trace_context_destroy_query(struct pipe_context *_pipe, +                            struct pipe_query *query) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "destroy_query"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, query); + +   pipe->destroy_query(pipe, query);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_begin_query(struct pipe_context *_pipe,  +                          struct pipe_query *query) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "begin_query"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, query); + +   pipe->begin_query(pipe, query);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_end_query(struct pipe_context *_pipe,  +                        struct pipe_query *query) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "end_query"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, query); + +   pipe->end_query(pipe, query); + +   trace_dump_call_end(); +} + + +static INLINE boolean +trace_context_get_query_result(struct pipe_context *_pipe,  +                               struct pipe_query *query, +                               boolean wait, +                               uint64_t *presult) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; +   uint64_t result; +   boolean _result; + +   trace_dump_call_begin("pipe_context", "get_query_result"); + +   trace_dump_arg(ptr, pipe); + +   _result = pipe->get_query_result(pipe, query, wait, presult);; +   result = *presult; + +   trace_dump_arg(uint, result); +   trace_dump_ret(bool, _result); +    +   trace_dump_call_end(); +    +   return _result; +} + + +static INLINE void * +trace_context_create_blend_state(struct pipe_context *_pipe, +                                 const struct pipe_blend_state *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; +   void * result; + +   trace_dump_call_begin("pipe_context", "create_blend_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(blend_state, state); + +   result = pipe->create_blend_state(pipe, state);; + +   trace_dump_ret(ptr, result); + +   trace_dump_call_end(); +    +   return result; +} + + +static INLINE void +trace_context_bind_blend_state(struct pipe_context *_pipe,  +                               void *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "bind_blend_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, state); + +   pipe->bind_blend_state(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_blend_state(struct pipe_context *_pipe,  +                                 void *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "delete_blend_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, state); + +   pipe->delete_blend_state(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void * +trace_context_create_sampler_state(struct pipe_context *_pipe, +                                   const struct pipe_sampler_state *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; +   void * result; + +   trace_dump_call_begin("pipe_context", "create_sampler_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(sampler_state, state); + +   result = pipe->create_sampler_state(pipe, state);; + +   trace_dump_ret(ptr, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static INLINE void +trace_context_bind_sampler_states(struct pipe_context *_pipe,  +                                  unsigned num_states, void **states) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "bind_sampler_states"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(uint, num_states); +   trace_dump_arg_array(ptr, states, num_states); + +   pipe->bind_sampler_states(pipe, num_states, states);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_sampler_state(struct pipe_context *_pipe,  +                                   void *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "delete_sampler_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, state); + +   pipe->delete_sampler_state(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void * +trace_context_create_rasterizer_state(struct pipe_context *_pipe, +                                      const struct pipe_rasterizer_state *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; +   void * result; + +   trace_dump_call_begin("pipe_context", "create_rasterizer_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(rasterizer_state, state); + +   result = pipe->create_rasterizer_state(pipe, state);; + +   trace_dump_ret(ptr, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static INLINE void +trace_context_bind_rasterizer_state(struct pipe_context *_pipe,  +                                    void *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "bind_rasterizer_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, state); + +   pipe->bind_rasterizer_state(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_rasterizer_state(struct pipe_context *_pipe,  +                                      void *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "delete_rasterizer_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, state); + +   pipe->delete_rasterizer_state(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void * +trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe, +                                               const struct pipe_depth_stencil_alpha_state *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; +   void * result; + +   trace_dump_call_begin("pipe_context", "create_depth_stencil_alpha_state"); + +   result = pipe->create_depth_stencil_alpha_state(pipe, state);; + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(depth_stencil_alpha_state, state); +    +   trace_dump_ret(ptr, result); + +   trace_dump_call_end(); +    +   return result; +} + + +static INLINE void +trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe,  +                                             void *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "bind_depth_stencil_alpha_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, state); + +   pipe->bind_depth_stencil_alpha_state(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,  +                                               void *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "delete_depth_stencil_alpha_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, state); + +   pipe->delete_depth_stencil_alpha_state(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void * +trace_context_create_fs_state(struct pipe_context *_pipe, +                              const struct pipe_shader_state *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; +   void * result; + +   trace_dump_call_begin("pipe_context", "create_fs_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(shader_state, state); + +   result = pipe->create_fs_state(pipe, state);; + +   trace_dump_ret(ptr, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static INLINE void +trace_context_bind_fs_state(struct pipe_context *_pipe,  +                            void *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "bind_fs_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, state); + +   pipe->bind_fs_state(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_fs_state(struct pipe_context *_pipe,  +                              void *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "delete_fs_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, state); + +   pipe->delete_fs_state(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void * +trace_context_create_vs_state(struct pipe_context *_pipe, +                              const struct pipe_shader_state *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; +   void * result; + +   trace_dump_call_begin("pipe_context", "create_vs_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(shader_state, state); + +   result = pipe->create_vs_state(pipe, state);; + +   trace_dump_ret(ptr, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static INLINE void +trace_context_bind_vs_state(struct pipe_context *_pipe,  +                            void *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "bind_vs_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, state); + +   pipe->bind_vs_state(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_vs_state(struct pipe_context *_pipe,  +                              void *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "delete_vs_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, state); + +   pipe->delete_vs_state(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_blend_color(struct pipe_context *_pipe, +                              const struct pipe_blend_color *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "set_blend_color"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(blend_color, state); + +   pipe->set_blend_color(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_clip_state(struct pipe_context *_pipe, +                             const struct pipe_clip_state *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "set_clip_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(clip_state, state); + +   pipe->set_clip_state(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_constant_buffer(struct pipe_context *_pipe, +                                  uint shader, uint index, +                                  const struct pipe_constant_buffer *buffer) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_winsys_user_buffer_update(_pipe->winsys, (struct pipe_buffer *)buffer); +    +   trace_dump_call_begin("pipe_context", "set_constant_buffer"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(uint, shader); +   trace_dump_arg(uint, index); +   trace_dump_arg(constant_buffer, buffer); + +   pipe->set_constant_buffer(pipe, shader, index, buffer);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_framebuffer_state(struct pipe_context *_pipe, +                                    const struct pipe_framebuffer_state *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; +   struct pipe_framebuffer_state unwrapped_state; +   unsigned i; +    +   /* Unwrap the input state */ +   memcpy(&unwrapped_state, state, sizeof(unwrapped_state)); +   for(i = 0; i < state->nr_cbufs; ++i) +      unwrapped_state.cbufs[i] = trace_surface_unwrap(tr_ctx, state->cbufs[i]); +   for(i = state->nr_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i) +      unwrapped_state.cbufs[i] = NULL; +   unwrapped_state.zsbuf = trace_surface_unwrap(tr_ctx, state->zsbuf); +   state = &unwrapped_state; +    +   trace_dump_call_begin("pipe_context", "set_framebuffer_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(framebuffer_state, state); + +   pipe->set_framebuffer_state(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_polygon_stipple(struct pipe_context *_pipe, +                                  const struct pipe_poly_stipple *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "set_polygon_stipple"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(poly_stipple, state); + +   pipe->set_polygon_stipple(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_scissor_state(struct pipe_context *_pipe, +                                const struct pipe_scissor_state *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "set_scissor_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(scissor_state, state); + +   pipe->set_scissor_state(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_viewport_state(struct pipe_context *_pipe, +                                 const struct pipe_viewport_state *state) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "set_viewport_state"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(viewport_state, state); + +   pipe->set_viewport_state(pipe, state);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_sampler_textures(struct pipe_context *_pipe, +                                   unsigned num_textures, +                                   struct pipe_texture **textures) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; +   struct pipe_texture *unwrapped_textures[PIPE_MAX_SAMPLERS]; +   unsigned i; +    +   for(i = 0; i < num_textures; ++i) +      unwrapped_textures[i] = trace_texture_unwrap(tr_ctx, textures[i]); +   textures = unwrapped_textures; + +   trace_dump_call_begin("pipe_context", "set_sampler_textures"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(uint, num_textures); +   trace_dump_arg_array(ptr, textures, num_textures); + +   pipe->set_sampler_textures(pipe, num_textures, textures);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_vertex_buffers(struct pipe_context *_pipe, +                                 unsigned num_buffers, +                                 const struct pipe_vertex_buffer *buffers) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; +   unsigned i; + +   for(i = 0; i < num_buffers; ++i) +      trace_winsys_user_buffer_update(_pipe->winsys, buffers[i].buffer); + +   trace_dump_call_begin("pipe_context", "set_vertex_buffers"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(uint, num_buffers); +    +   trace_dump_arg_begin("buffers"); +   trace_dump_struct_array(vertex_buffer, buffers, num_buffers); +   trace_dump_arg_end(); + +   pipe->set_vertex_buffers(pipe, num_buffers, buffers);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_vertex_elements(struct pipe_context *_pipe, +                                  unsigned num_elements, +                                  const struct pipe_vertex_element *elements) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "set_vertex_elements"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(uint, num_elements); + +   trace_dump_arg_begin("elements"); +   trace_dump_struct_array(vertex_element, elements, num_elements); +   trace_dump_arg_end(); + +   pipe->set_vertex_elements(pipe, num_elements, elements);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_surface_copy(struct pipe_context *_pipe, +                           boolean do_flip, +                           struct pipe_surface *dest, +                           unsigned destx, unsigned desty, +                           struct pipe_surface *src, +                           unsigned srcx, unsigned srcy, +                           unsigned width, unsigned height) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   dest = trace_surface_unwrap(tr_ctx, dest); +   src = trace_surface_unwrap(tr_ctx, src); +    +   trace_dump_call_begin("pipe_context", "surface_copy"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(bool, do_flip); +   trace_dump_arg(ptr, dest); +   trace_dump_arg(uint, destx); +   trace_dump_arg(uint, desty); +   trace_dump_arg(ptr, src); +   trace_dump_arg(uint, srcx); +   trace_dump_arg(uint, srcy); +   trace_dump_arg(uint, width); +   trace_dump_arg(uint, height); + +   pipe->surface_copy(pipe, do_flip,  +                      dest, destx, desty,  +                      src, srcx, srcy, width, height); +    +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_surface_fill(struct pipe_context *_pipe, +                           struct pipe_surface *dst, +                           unsigned dstx, unsigned dsty, +                           unsigned width, unsigned height, +                           unsigned value) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   dst = trace_surface_unwrap(tr_ctx, dst); + +   trace_dump_call_begin("pipe_context", "surface_fill"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, dst); +   trace_dump_arg(uint, dstx); +   trace_dump_arg(uint, dsty); +   trace_dump_arg(uint, width); +   trace_dump_arg(uint, height); + +   pipe->surface_fill(pipe, dst, dstx, dsty, width, height, value);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_clear(struct pipe_context *_pipe,  +                    struct pipe_surface *surface, +                    unsigned clearValue) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   surface = trace_surface_unwrap(tr_ctx, surface); + +   trace_dump_call_begin("pipe_context", "clear"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(ptr, surface); +   trace_dump_arg(uint, clearValue); + +   pipe->clear(pipe, surface, clearValue);; + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_flush(struct pipe_context *_pipe, +                    unsigned flags, +                    struct pipe_fence_handle **fence) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "flush"); + +   trace_dump_arg(ptr, pipe); +   trace_dump_arg(uint, flags); + +   pipe->flush(pipe, flags, fence);; + +   if(fence) +      trace_dump_ret(ptr, *fence); + +   trace_dump_call_end(); +} + + +static INLINE void +trace_context_destroy(struct pipe_context *_pipe) +{ +   struct trace_context *tr_ctx = trace_context(_pipe); +   struct pipe_context *pipe = tr_ctx->pipe; + +   trace_dump_call_begin("pipe_context", "destroy"); + +   trace_dump_arg(ptr, pipe); + +   pipe->destroy(pipe); +    +   trace_dump_call_end(); + +   FREE(tr_ctx); +} + + +struct pipe_context * +trace_context_create(struct pipe_screen *screen,  +                     struct pipe_context *pipe) +{ +   struct trace_context *tr_ctx; +    +   if(!pipe) +      goto error1; +    +   if(!trace_dump_enabled()) +      goto error1; +    +   tr_ctx = CALLOC_STRUCT(trace_context); +   if(!tr_ctx) +      goto error1; + +   tr_ctx->base.winsys = screen->winsys; +   tr_ctx->base.screen = screen; +   tr_ctx->base.destroy = trace_context_destroy; +   tr_ctx->base.set_edgeflags = trace_context_set_edgeflags; +   tr_ctx->base.draw_arrays = trace_context_draw_arrays; +   tr_ctx->base.draw_elements = trace_context_draw_elements; +   tr_ctx->base.draw_range_elements = trace_context_draw_range_elements; +   tr_ctx->base.create_query = trace_context_create_query; +   tr_ctx->base.destroy_query = trace_context_destroy_query; +   tr_ctx->base.begin_query = trace_context_begin_query; +   tr_ctx->base.end_query = trace_context_end_query; +   tr_ctx->base.get_query_result = trace_context_get_query_result; +   tr_ctx->base.create_blend_state = trace_context_create_blend_state; +   tr_ctx->base.bind_blend_state = trace_context_bind_blend_state; +   tr_ctx->base.delete_blend_state = trace_context_delete_blend_state; +   tr_ctx->base.create_sampler_state = trace_context_create_sampler_state; +   tr_ctx->base.bind_sampler_states = trace_context_bind_sampler_states; +   tr_ctx->base.delete_sampler_state = trace_context_delete_sampler_state; +   tr_ctx->base.create_rasterizer_state = trace_context_create_rasterizer_state; +   tr_ctx->base.bind_rasterizer_state = trace_context_bind_rasterizer_state; +   tr_ctx->base.delete_rasterizer_state = trace_context_delete_rasterizer_state; +   tr_ctx->base.create_depth_stencil_alpha_state = trace_context_create_depth_stencil_alpha_state; +   tr_ctx->base.bind_depth_stencil_alpha_state = trace_context_bind_depth_stencil_alpha_state; +   tr_ctx->base.delete_depth_stencil_alpha_state = trace_context_delete_depth_stencil_alpha_state; +   tr_ctx->base.create_fs_state = trace_context_create_fs_state; +   tr_ctx->base.bind_fs_state = trace_context_bind_fs_state; +   tr_ctx->base.delete_fs_state = trace_context_delete_fs_state; +   tr_ctx->base.create_vs_state = trace_context_create_vs_state; +   tr_ctx->base.bind_vs_state = trace_context_bind_vs_state; +   tr_ctx->base.delete_vs_state = trace_context_delete_vs_state; +   tr_ctx->base.set_blend_color = trace_context_set_blend_color; +   tr_ctx->base.set_clip_state = trace_context_set_clip_state; +   tr_ctx->base.set_constant_buffer = trace_context_set_constant_buffer; +   tr_ctx->base.set_framebuffer_state = trace_context_set_framebuffer_state; +   tr_ctx->base.set_polygon_stipple = trace_context_set_polygon_stipple; +   tr_ctx->base.set_scissor_state = trace_context_set_scissor_state; +   tr_ctx->base.set_viewport_state = trace_context_set_viewport_state; +   tr_ctx->base.set_sampler_textures = trace_context_set_sampler_textures; +   tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers; +   tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements; +   tr_ctx->base.surface_copy = trace_context_surface_copy; +   tr_ctx->base.surface_fill = trace_context_surface_fill; +   tr_ctx->base.clear = trace_context_clear; +   tr_ctx->base.flush = trace_context_flush; + +   tr_ctx->pipe = pipe; +    +   trace_dump_call_begin("", "pipe_context_create"); +   trace_dump_arg_begin("screen"); +   trace_dump_ptr(pipe->screen); +   trace_dump_arg_end(); +   trace_dump_ret(ptr, pipe); +   trace_dump_call_end(); + +   return &tr_ctx->base; +    +error1: +   return pipe; +} diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h new file mode 100644 index 0000000000..7831900ec2 --- /dev/null +++ b/src/gallium/drivers/trace/tr_context.h @@ -0,0 +1,68 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_CONTEXT_H_ +#define TR_CONTEXT_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_context.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +    +struct trace_context +{ +   struct pipe_context base; +    +   struct pipe_context *pipe; +}; + + +static INLINE struct trace_context * +trace_context(struct pipe_context *pipe) +{ +   assert(pipe); +   return (struct trace_context *)pipe; +} + + + +struct pipe_context * +trace_context_create(struct pipe_screen *screen, +                     struct pipe_context *pipe); + + +#ifdef __cplusplus +} +#endif + +#endif /* TR_CONTEXT_H_ */ diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c new file mode 100644 index 0000000000..a0ead0ded3 --- /dev/null +++ b/src/gallium/drivers/trace/tr_dump.c @@ -0,0 +1,404 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Trace dumping functions. + *  + * For now we just use standard XML for dumping the trace calls, as this is + * simple to write, parse, and visually inspect, but the actual representation  + * is abstracted out of this file, so that we can switch to a binary  + * representation if/when it becomes justified. + *  + * @author Jose Fonseca <jrfonseca@tungstengraphics.com>    + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) +#include <stdlib.h> +#endif + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "util/u_memory.h" +#include "util/u_string.h" +#include "util/u_stream.h" + +#include "tr_dump.h" + + +static struct util_stream *stream = NULL; +static unsigned refcount = 0; + + +static INLINE void  +trace_dump_write(const char *buf, size_t size) +{ +   if(stream) +      util_stream_write(stream, buf, size); +} + + +static INLINE void  +trace_dump_writes(const char *s) +{ +   trace_dump_write(s, strlen(s)); +} + + +static INLINE void  +trace_dump_writef(const char *format, ...) +{ +   static char buf[1024]; +   unsigned len; +   va_list ap; +   va_start(ap, format); +   len = util_vsnprintf(buf, sizeof(buf), format, ap); +   va_end(ap); +   trace_dump_write(buf, len); +} + + +static INLINE void  +trace_dump_escape(const char *str)  +{ +   const unsigned char *p = (const unsigned char *)str; +   unsigned char c; +   while((c = *p++) != 0) { +      if(c == '<') +         trace_dump_writes("<"); +      else if(c == '>') +         trace_dump_writes(">"); +      else if(c == '&') +         trace_dump_writes("&"); +      else if(c == '\'') +         trace_dump_writes("'"); +      else if(c == '\"') +         trace_dump_writes("""); +      else if(c >= 0x20 && c <= 0x7e) +         trace_dump_writef("%c", c); +      else +         trace_dump_writef("&#%u;", c); +   } +} + + +static INLINE void  +trace_dump_indent(unsigned level) +{ +   unsigned i; +   for(i = 0; i < level; ++i) +      trace_dump_writes("\t"); +} + + +static INLINE void  +trace_dump_newline(void)  +{ +   trace_dump_writes("\n"); +} + + +static INLINE void  +trace_dump_tag(const char *name) +{ +   trace_dump_writes("<"); +   trace_dump_writes(name); +   trace_dump_writes("/>"); +} + + +static INLINE void  +trace_dump_tag_begin(const char *name) +{ +   trace_dump_writes("<"); +   trace_dump_writes(name); +   trace_dump_writes(">"); +} + +static INLINE void  +trace_dump_tag_begin1(const char *name,  +                      const char *attr1, const char *value1) +{ +   trace_dump_writes("<"); +   trace_dump_writes(name); +   trace_dump_writes(" "); +   trace_dump_writes(attr1); +   trace_dump_writes("='"); +   trace_dump_escape(value1); +   trace_dump_writes("'>"); +} + + +static INLINE void  +trace_dump_tag_begin2(const char *name,  +                      const char *attr1, const char *value1, +                      const char *attr2, const char *value2) +{ +   trace_dump_writes("<"); +   trace_dump_writes(name); +   trace_dump_writes(" "); +   trace_dump_writes(attr1); +   trace_dump_writes("=\'"); +   trace_dump_escape(value1); +   trace_dump_writes("\' "); +   trace_dump_writes(attr2); +   trace_dump_writes("=\'"); +   trace_dump_escape(value2); +   trace_dump_writes("\'>"); +} + + +static INLINE void  +trace_dump_tag_begin3(const char *name,  +                      const char *attr1, const char *value1, +                      const char *attr2, const char *value2, +                      const char *attr3, const char *value3) +{ +   trace_dump_writes("<"); +   trace_dump_writes(name); +   trace_dump_writes(" "); +   trace_dump_writes(attr1); +   trace_dump_writes("=\'"); +   trace_dump_escape(value1); +   trace_dump_writes("\' "); +   trace_dump_writes(attr2); +   trace_dump_writes("=\'"); +   trace_dump_escape(value2); +   trace_dump_writes("\' "); +   trace_dump_writes(attr3); +   trace_dump_writes("=\'"); +   trace_dump_escape(value3); +   trace_dump_writes("\'>"); +} + + +static INLINE void +trace_dump_tag_end(const char *name) +{ +   trace_dump_writes("</"); +   trace_dump_writes(name); +   trace_dump_writes(">"); +} + +static void  +trace_dump_trace_close(void) +{ +   if(stream) { +      trace_dump_writes("</trace>\n"); +      util_stream_close(stream); +      stream = NULL; +      refcount = 0; +   } +} + +boolean trace_dump_trace_begin() +{ +   const char *filename; +    +   filename = debug_get_option("GALLIUM_TRACE", NULL); +   if(!filename) +      return FALSE; +    +   if(!stream) { +    +      stream = util_stream_create(filename, 0); +      if(!stream) +         return FALSE; +       +      trace_dump_writes("<?xml version='1.0' encoding='UTF-8'?>\n"); +      trace_dump_writes("<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n"); +      trace_dump_writes("<trace version='0.1'>\n"); +       +#if defined(PIPE_OS_LINUX) +      /* Linux applications rarely cleanup GL / Gallium resources so catch  +       * application exit here */  +      atexit(trace_dump_trace_close); +#endif +   } +    +   ++refcount; +    +   return TRUE; +} + +boolean trace_dump_enabled(void) +{ +   return stream ? TRUE : FALSE; +} + +void trace_dump_trace_end(void) +{ +   if(stream) +      if(!--refcount) +         trace_dump_trace_close(); +} + +void trace_dump_call_begin(const char *klass, const char *method) +{ +   trace_dump_indent(1); +   trace_dump_tag_begin2("call", "class", klass, "method", method); +   trace_dump_newline(); +} + +void trace_dump_call_end(void) +{ +   trace_dump_indent(1); +   trace_dump_tag_end("call"); +   trace_dump_newline(); +   util_stream_flush(stream); +} + +void trace_dump_arg_begin(const char *name) +{ +   trace_dump_indent(2); +   trace_dump_tag_begin1("arg", "name", name); +} + +void trace_dump_arg_end(void) +{ +   trace_dump_tag_end("arg"); +   trace_dump_newline(); +} + +void trace_dump_ret_begin(void) +{ +   trace_dump_indent(2); +   trace_dump_tag_begin("ret"); +} + +void trace_dump_ret_end(void) +{ +   trace_dump_tag_end("ret"); +   trace_dump_newline(); +} + +void trace_dump_bool(int value) +{ +   trace_dump_writef("<bool>%c</bool>", value ? '1' : '0'); +} + +void trace_dump_int(long long int value) +{ +   trace_dump_writef("<int>%lli</int>", value); +} + +void trace_dump_uint(long long unsigned value) +{ +   trace_dump_writef("<uint>%llu</uint>", value); +} + +void trace_dump_float(double value) +{ +   trace_dump_writef("<float>%g</float>", value); +} + +void trace_dump_bytes(const void *data, +                      long unsigned size) +{ +   static const char hex_table[16] = "0123456789ABCDEF"; +   const uint8_t *p = data; +   long unsigned i; +   trace_dump_writes("<bytes>"); +   for(i = 0; i < size; ++i) { +      uint8_t byte = *p++; +      char hex[2]; +      hex[0] = hex_table[byte >> 4]; +      hex[1] = hex_table[byte & 0xf]; +      trace_dump_write(hex, 2); +   } +   trace_dump_writes("</bytes>"); +} + +void trace_dump_string(const char *str) +{ +   trace_dump_writes("<string>"); +   trace_dump_escape(str); +   trace_dump_writes("</string>"); +} + +void trace_dump_enum(const char *value) +{ +   trace_dump_writes("<enum>"); +   trace_dump_escape(value); +   trace_dump_writes("</enum>"); +} + +void trace_dump_array_begin(void) +{ +   trace_dump_writes("<array>"); +} + +void trace_dump_array_end(void) +{ +   trace_dump_writes("</array>"); +} + +void trace_dump_elem_begin(void) +{ +   trace_dump_writes("<elem>"); +} + +void trace_dump_elem_end(void) +{ +   trace_dump_writes("</elem>"); +} + +void trace_dump_struct_begin(const char *name) +{ +   trace_dump_writef("<struct name='%s'>", name); +} + +void trace_dump_struct_end(void) +{ +   trace_dump_writes("</struct>"); +} + +void trace_dump_member_begin(const char *name) +{ +   trace_dump_writef("<member name='%s'>", name); +} + +void trace_dump_member_end(void) +{ +   trace_dump_writes("</member>"); +} + +void trace_dump_null(void) +{ +   trace_dump_writes("<null/>"); +} + +void trace_dump_ptr(const void *value) +{ +   if(value) +      trace_dump_writef("<ptr>0x%08lx</ptr>", (unsigned long)(uintptr_t)value); +   else +      trace_dump_null(); +} diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h new file mode 100644 index 0000000000..76a53731b3 --- /dev/null +++ b/src/gallium/drivers/trace/tr_dump.h @@ -0,0 +1,132 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Trace data dumping primitives. + */ + +#ifndef TR_DUMP_H +#define TR_DUMP_H + + +#include "pipe/p_compiler.h" + + +boolean trace_dump_trace_begin(void); +boolean trace_dump_enabled(void); +void trace_dump_trace_end(void); +void trace_dump_call_begin(const char *klass, const char *method); +void trace_dump_call_end(void); +void trace_dump_arg_begin(const char *name); +void trace_dump_arg_end(void); +void trace_dump_ret_begin(void); +void trace_dump_ret_end(void); +void trace_dump_bool(int value); +void trace_dump_int(long long int value); +void trace_dump_uint(long long unsigned value); +void trace_dump_float(double value); +void trace_dump_bytes(const void *data, long unsigned size); +void trace_dump_string(const char *str); +void trace_dump_enum(const char *value); +void trace_dump_array_begin(void); +void trace_dump_array_end(void); +void trace_dump_elem_begin(void); +void trace_dump_elem_end(void); +void trace_dump_struct_begin(const char *name); +void trace_dump_struct_end(void); +void trace_dump_member_begin(const char *name); +void trace_dump_member_end(void); +void trace_dump_null(void); +void trace_dump_ptr(const void *value); + + +/* + * Code saving macros.  + */ + +#define trace_dump_arg(_type, _arg) \ +   do { \ +      trace_dump_arg_begin(#_arg); \ +      trace_dump_##_type(_arg); \ +      trace_dump_arg_end(); \ +   } while(0) + +#define trace_dump_ret(_type, _arg) \ +   do { \ +      trace_dump_ret_begin(); \ +      trace_dump_##_type(_arg); \ +      trace_dump_ret_end(); \ +   } while(0) + +#define trace_dump_array(_type, _obj, _size) \ +   do { \ +      unsigned long idx; \ +      trace_dump_array_begin(); \ +      for(idx = 0; idx < (_size); ++idx) { \ +         trace_dump_elem_begin(); \ +         trace_dump_##_type((_obj)[idx]); \ +         trace_dump_elem_end(); \ +      } \ +      trace_dump_array_end(); \ +   } while(0) + +#define trace_dump_struct_array(_type, _obj, _size) \ +   do { \ +      unsigned long idx; \ +      trace_dump_array_begin(); \ +      for(idx = 0; idx < (_size); ++idx) { \ +         trace_dump_elem_begin(); \ +         trace_dump_##_type(&(_obj)[idx]); \ +         trace_dump_elem_end(); \ +      } \ +      trace_dump_array_end(); \ +   } while(0) + +#define trace_dump_member(_type, _obj, _member) \ +   do { \ +      trace_dump_member_begin(#_member); \ +      trace_dump_##_type((_obj)->_member); \ +      trace_dump_member_end(); \ +   } while(0) + +#define trace_dump_arg_array(_type, _arg, _size) \ +   do { \ +      trace_dump_arg_begin(#_arg); \ +      trace_dump_array(_type, _arg, _size); \ +      trace_dump_arg_end(); \ +   } while(0) + +#define trace_dump_member_array(_type, _obj, _member) \ +   do { \ +      trace_dump_member_begin(#_member); \ +      trace_dump_array(_type, (_obj)->_member, sizeof((_obj)->_member)/sizeof((_obj)->_member[0])); \ +      trace_dump_member_end(); \ +   } while(0) + + +#endif /* TR_DUMP_H */ diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c new file mode 100644 index 0000000000..8789f86b1a --- /dev/null +++ b/src/gallium/drivers/trace/tr_screen.c @@ -0,0 +1,469 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" + +#include "tr_dump.h" +#include "tr_state.h" +#include "tr_winsys.h" +#include "tr_texture.h" +#include "tr_screen.h" + + +static const char * +trace_screen_get_name(struct pipe_screen *_screen) +{ +   struct trace_screen *tr_scr = trace_screen(_screen); +   struct pipe_screen *screen = tr_scr->screen; +   const char *result; +    +   trace_dump_call_begin("pipe_screen", "get_name"); +    +   trace_dump_arg(ptr, screen); + +   result = screen->get_name(screen); +    +   trace_dump_ret(string, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static const char * +trace_screen_get_vendor(struct pipe_screen *_screen) +{ +   struct trace_screen *tr_scr = trace_screen(_screen); +   struct pipe_screen *screen = tr_scr->screen; +   const char *result; +    +   trace_dump_call_begin("pipe_screen", "get_vendor"); +    +   trace_dump_arg(ptr, screen); +   +   result = screen->get_vendor(screen); +    +   trace_dump_ret(string, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static int  +trace_screen_get_param(struct pipe_screen *_screen,  +                       int param) +{ +   struct trace_screen *tr_scr = trace_screen(_screen); +   struct pipe_screen *screen = tr_scr->screen; +   int result; +    +   trace_dump_call_begin("pipe_screen", "get_param"); +    +   trace_dump_arg(ptr, screen); +   trace_dump_arg(int, param); + +   result = screen->get_param(screen, param); +    +   trace_dump_ret(int, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static float  +trace_screen_get_paramf(struct pipe_screen *_screen,  +                        int param) +{ +   struct trace_screen *tr_scr = trace_screen(_screen); +   struct pipe_screen *screen = tr_scr->screen; +   float result; +    +   trace_dump_call_begin("pipe_screen", "get_paramf"); +    +   trace_dump_arg(ptr, screen); +   trace_dump_arg(int, param); + +   result = screen->get_paramf(screen, param); +    +   trace_dump_ret(float, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static boolean  +trace_screen_is_format_supported(struct pipe_screen *_screen, +                                 enum pipe_format format, +                                 enum pipe_texture_target target, +                                 unsigned tex_usage,  +                                 unsigned geom_flags) +{ +   struct trace_screen *tr_scr = trace_screen(_screen); +   struct pipe_screen *screen = tr_scr->screen; +   boolean result; +    +   trace_dump_call_begin("pipe_screen", "is_format_supported"); +    +   trace_dump_arg(ptr, screen); +   trace_dump_arg(format, format); +   trace_dump_arg(int, target); +   trace_dump_arg(uint, tex_usage); +   trace_dump_arg(uint, geom_flags); + +   result = screen->is_format_supported(screen, format, target, tex_usage, geom_flags); +    +   trace_dump_ret(bool, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static struct pipe_texture * +trace_screen_texture_create(struct pipe_screen *_screen, +                            const struct pipe_texture *templat) +{ +   struct trace_screen *tr_scr = trace_screen(_screen); +   struct pipe_screen *screen = tr_scr->screen; +   struct pipe_texture *result; +    +   trace_dump_call_begin("pipe_screen", "texture_create"); + +   trace_dump_arg(ptr, screen); +   trace_dump_arg(template, templat); + +   result = screen->texture_create(screen, templat); +    +   trace_dump_ret(ptr, result); +    +   trace_dump_call_end(); +    +   result = trace_texture_create(tr_scr, result); +    +   return result; +} + + +static struct pipe_texture * +trace_screen_texture_blanket(struct pipe_screen *_screen, +                             const struct pipe_texture *templat, +                             const unsigned *ppitch, +                             struct pipe_buffer *buffer) +{ +   struct trace_screen *tr_scr = trace_screen(_screen); +   struct pipe_screen *screen = tr_scr->screen; +   unsigned pitch = *ppitch; +   struct pipe_texture *result; + +   trace_dump_call_begin("pipe_screen", "texture_blanket"); + +   trace_dump_arg(ptr, screen); +   trace_dump_arg(template, templat); +   trace_dump_arg(uint, pitch); +   trace_dump_arg(ptr, buffer); + +   result = screen->texture_blanket(screen, templat, ppitch, buffer); +    +   trace_dump_ret(ptr, result); +    +   trace_dump_call_end(); +    +   result = trace_texture_create(tr_scr, result); +    +   return result; +} + + +static void  +trace_screen_texture_release(struct pipe_screen *_screen, +                             struct pipe_texture **ptexture) +{ +   struct trace_screen *tr_scr = trace_screen(_screen); +   struct pipe_screen *screen = tr_scr->screen; +   struct trace_texture *tr_tex; +   struct pipe_texture *texture; +    +   assert(ptexture); +   if(*ptexture) { +      tr_tex = trace_texture(tr_scr, *ptexture); +      texture = tr_tex->texture; +      assert(texture->screen == screen); +   } +   else +      texture = NULL; +    +   if (*ptexture) { +      if (!--(*ptexture)->refcount) { +         trace_dump_call_begin("pipe_screen", "texture_destroy"); +          +         trace_dump_arg(ptr, screen); +         trace_dump_arg(ptr, texture); +          +         trace_texture_destroy(tr_scr, *ptexture); +          +         trace_dump_call_end(); +      } +    +      *ptexture = NULL; +   } +} + + +static struct pipe_surface * +trace_screen_get_tex_surface(struct pipe_screen *_screen, +                             struct pipe_texture *texture, +                             unsigned face, unsigned level, +                             unsigned zslice, +                             unsigned usage) +{ +   struct trace_screen *tr_scr = trace_screen(_screen); +   struct pipe_screen *screen = tr_scr->screen; +   struct trace_texture *tr_tex; +   struct pipe_surface *result; +    +   assert(texture); +   tr_tex = trace_texture(tr_scr, texture); +   texture = tr_tex->texture; +   assert(texture->screen == screen); +    +   trace_dump_call_begin("pipe_screen", "get_tex_surface"); +    +   trace_dump_arg(ptr, screen); +   trace_dump_arg(ptr, texture); +   trace_dump_arg(uint, face); +   trace_dump_arg(uint, level); +   trace_dump_arg(uint, zslice); +   trace_dump_arg(uint, usage); + +   result = screen->get_tex_surface(screen, texture, face, level, zslice, usage); + +   trace_dump_ret(ptr, result); +    +   trace_dump_call_end(); +    +   result = trace_surface_create(tr_tex, result); + +   return result; +} + + +static void  +trace_screen_tex_surface_release(struct pipe_screen *_screen, +                                 struct pipe_surface **psurface) +{ +   struct trace_screen *tr_scr = trace_screen(_screen); +   struct pipe_screen *screen = tr_scr->screen; +   struct trace_texture *tr_tex; +   struct trace_surface *tr_surf; +   struct pipe_surface *surface; +    +   assert(psurface); +   if(*psurface) { +      tr_tex = trace_texture(tr_scr, (*psurface)->texture); +      tr_surf = trace_surface(tr_tex, *psurface); +      surface = tr_surf->surface; +   } +   else +      surface = NULL; +    +   if (*psurface) { +      if (!--(*psurface)->refcount) { +         trace_dump_call_begin("pipe_screen", "tex_surface_destroy"); +          +         trace_dump_arg(ptr, screen); +         trace_dump_arg(ptr, surface); + +         trace_surface_destroy(tr_tex, *psurface); + +         trace_dump_call_end(); +      } +    +      *psurface = NULL; +   } +} + + +static void * +trace_screen_surface_map(struct pipe_screen *_screen, +                         struct pipe_surface *surface, +                         unsigned flags) +{ +   struct trace_screen *tr_scr = trace_screen(_screen); +   struct pipe_screen *screen = tr_scr->screen; +   struct trace_texture *tr_tex; +   struct trace_surface *tr_surf; +   void *map; +    +   tr_tex = trace_texture(tr_scr, surface->texture); +   tr_surf = trace_surface(tr_tex, surface); +   surface = tr_surf->surface; + +   map = screen->surface_map(screen, surface, flags); +   if(map) { +      if(flags & PIPE_BUFFER_USAGE_CPU_WRITE) { +         assert(!tr_surf->map); +         tr_surf->map = map; +      } +   } +    +   return map; +} + + +static void  +trace_screen_surface_unmap(struct pipe_screen *_screen, +                           struct pipe_surface *surface) +{ +   struct trace_screen *tr_scr = trace_screen(_screen); +   struct pipe_screen *screen = tr_scr->screen; +   struct trace_texture *tr_tex; +   struct trace_surface *tr_surf; +    +   tr_tex = trace_texture(tr_scr, surface->texture); +   tr_surf = trace_surface(tr_tex, surface); +   surface = tr_surf->surface; +    +   if(tr_surf->map) { +      size_t size = surface->nblocksy * surface->stride; +       +      trace_dump_call_begin("pipe_winsys", "surface_write"); +       +      trace_dump_arg(ptr, screen); +       +      trace_dump_arg(ptr, surface); +       +      trace_dump_arg_begin("data"); +      trace_dump_bytes(tr_surf->map, size); +      trace_dump_arg_end(); + +      trace_dump_arg_begin("stride"); +      trace_dump_uint(surface->stride); +      trace_dump_arg_end(); + +      trace_dump_arg_begin("size"); +      trace_dump_uint(size); +      trace_dump_arg_end(); +    +      trace_dump_call_end(); + +      tr_surf->map = NULL; +   } + +   screen->surface_unmap(screen, surface); +} + + +static void +trace_screen_destroy(struct pipe_screen *_screen) +{ +   struct trace_screen *tr_scr = trace_screen(_screen); +   struct pipe_screen *screen = tr_scr->screen; +    +   trace_dump_call_begin("pipe_screen", "destroy"); +    +   trace_dump_arg(ptr, screen); + +   screen->destroy(screen); +    +   trace_dump_call_end(); + +   trace_dump_trace_end(); + +   FREE(tr_scr); +} + + +struct pipe_screen * +trace_screen_create(struct pipe_screen *screen) +{ +   struct trace_screen *tr_scr; +   struct pipe_winsys *winsys; +    +   if(!screen) +      goto error1; + +   if(!trace_dump_trace_begin()) +      goto error1; + +   tr_scr = CALLOC_STRUCT(trace_screen); +   if(!tr_scr) +      goto error2; + +   winsys = trace_winsys_create(screen->winsys); +   if(!winsys) +      goto error3; +    +   tr_scr->base.winsys = winsys; +   tr_scr->base.destroy = trace_screen_destroy; +   tr_scr->base.get_name = trace_screen_get_name; +   tr_scr->base.get_vendor = trace_screen_get_vendor; +   tr_scr->base.get_param = trace_screen_get_param; +   tr_scr->base.get_paramf = trace_screen_get_paramf; +   tr_scr->base.is_format_supported = trace_screen_is_format_supported; +   tr_scr->base.texture_create = trace_screen_texture_create; +   tr_scr->base.texture_blanket = trace_screen_texture_blanket; +   tr_scr->base.texture_release = trace_screen_texture_release; +   tr_scr->base.get_tex_surface = trace_screen_get_tex_surface; +   tr_scr->base.tex_surface_release = trace_screen_tex_surface_release; +   tr_scr->base.surface_map = trace_screen_surface_map; +   tr_scr->base.surface_unmap = trace_screen_surface_unmap; +    +   tr_scr->screen = screen; + +   trace_dump_call_begin("", "pipe_screen_create"); +   trace_dump_arg_begin("winsys"); +   trace_dump_ptr(screen->winsys); +   trace_dump_arg_end(); +   trace_dump_ret(ptr, screen); +   trace_dump_call_end(); + +   return &tr_scr->base; + +error3: +   FREE(tr_scr); +error2: +   trace_dump_trace_end(); +error1: +   return screen; +} + + +struct trace_screen * +trace_screen(struct pipe_screen *screen) +{ +   assert(screen); +   assert(screen->destroy == trace_screen_destroy); +   return (struct trace_screen *)screen; +} diff --git a/src/gallium/drivers/trace/tr_screen.h b/src/gallium/drivers/trace/tr_screen.h new file mode 100644 index 0000000000..93fefdb9a5 --- /dev/null +++ b/src/gallium/drivers/trace/tr_screen.h @@ -0,0 +1,60 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_SCREEN_H_ +#define TR_SCREEN_H_ + + +#include "pipe/p_screen.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +    +struct trace_screen +{ +   struct pipe_screen base; +    +   struct pipe_screen *screen; +}; + + +struct trace_screen * +trace_screen(struct pipe_screen *screen); + + +struct pipe_screen * +trace_screen_create(struct pipe_screen *screen); + + +#ifdef __cplusplus +} +#endif + +#endif /* TR_SCREEN_H_ */ diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c new file mode 100644 index 0000000000..524f2d6194 --- /dev/null +++ b/src/gallium/drivers/trace/tr_state.c @@ -0,0 +1,462 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_compiler.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_dump.h" + +#include "tr_dump.h" +#include "tr_state.h" + + +void trace_dump_format(enum pipe_format format) +{ +   trace_dump_enum(pf_name(format) ); +} + + +void trace_dump_block(const struct pipe_format_block *block) +{ +   trace_dump_struct_begin("pipe_format_block"); +   trace_dump_member(uint, block, size); +   trace_dump_member(uint, block, width); +   trace_dump_member(uint, block, height); +   trace_dump_struct_end(); +} + + +void trace_dump_template(const struct pipe_texture *templat) +{ +   if(!templat) { +      trace_dump_null(); +      return; +   } + +   trace_dump_struct_begin("pipe_texture"); +    +   trace_dump_member(int, templat, target); +   trace_dump_member(format, templat, format); +    +   trace_dump_member_begin("width"); +   trace_dump_array(uint, templat->width, 1); +   trace_dump_member_end(); + +   trace_dump_member_begin("height"); +   trace_dump_array(uint, templat->height, 1); +   trace_dump_member_end(); + +   trace_dump_member_begin("depth"); +   trace_dump_array(uint, templat->depth, 1); +   trace_dump_member_end(); + +   trace_dump_member_begin("block"); +   trace_dump_block(&templat->block); +   trace_dump_member_end(); +    +   trace_dump_member(uint, templat, last_level); +   trace_dump_member(uint, templat, tex_usage); +    +   trace_dump_struct_end(); +} + + +void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state) +{ +   if(!state) { +      trace_dump_null(); +      return; +   } + +   trace_dump_struct_begin("pipe_rasterizer_state"); + +   trace_dump_member(bool, state, flatshade); +   trace_dump_member(bool, state, light_twoside); +   trace_dump_member(uint, state, front_winding); +   trace_dump_member(uint, state, cull_mode); +   trace_dump_member(uint, state, fill_cw); +   trace_dump_member(uint, state, fill_ccw); +   trace_dump_member(bool, state, offset_cw); +   trace_dump_member(bool, state, offset_ccw); +   trace_dump_member(bool, state, scissor); +   trace_dump_member(bool, state, poly_smooth); +   trace_dump_member(bool, state, poly_stipple_enable); +   trace_dump_member(bool, state, point_smooth); +   trace_dump_member(bool, state, point_sprite); +   trace_dump_member(bool, state, point_size_per_vertex); +   trace_dump_member(bool, state, multisample); +   trace_dump_member(bool, state, line_smooth); +   trace_dump_member(bool, state, line_stipple_enable); +   trace_dump_member(uint, state, line_stipple_factor); +   trace_dump_member(uint, state, line_stipple_pattern); +   trace_dump_member(bool, state, line_last_pixel); +   trace_dump_member(bool, state, bypass_clipping); +   trace_dump_member(bool, state, bypass_vs); +   trace_dump_member(bool, state, origin_lower_left); +   trace_dump_member(bool, state, flatshade_first); +   trace_dump_member(bool, state, gl_rasterization_rules); + +   trace_dump_member(float, state, line_width); +   trace_dump_member(float, state, point_size); +   trace_dump_member(float, state, point_size_min); +   trace_dump_member(float, state, point_size_max); +   trace_dump_member(float, state, offset_units); +   trace_dump_member(float, state, offset_scale); +    +   trace_dump_member_array(uint, state, sprite_coord_mode); +    +   trace_dump_struct_end(); +} + + +void trace_dump_poly_stipple(const struct pipe_poly_stipple *state) +{ +   if(!state) { +      trace_dump_null(); +      return; +   } + +   trace_dump_struct_begin("pipe_poly_stipple"); + +   trace_dump_member_begin("stipple"); +   trace_dump_array(uint, +                    state->stipple,  +                    Elements(state->stipple)); +   trace_dump_member_end(); +    +   trace_dump_struct_end(); +} + + +void trace_dump_viewport_state(const struct pipe_viewport_state *state) +{ +   if(!state) { +      trace_dump_null(); +      return; +   } + +   trace_dump_struct_begin("pipe_viewport_state"); + +   trace_dump_member_array(float, state, scale); +   trace_dump_member_array(float, state, translate); +    +   trace_dump_struct_end(); +} + + +void trace_dump_scissor_state(const struct pipe_scissor_state *state) +{ +   if(!state) { +      trace_dump_null(); +      return; +   } + +   trace_dump_struct_begin("pipe_scissor_state"); + +   trace_dump_member(uint, state, minx); +   trace_dump_member(uint, state, miny); +   trace_dump_member(uint, state, maxx); +   trace_dump_member(uint, state, maxy); + +   trace_dump_struct_end(); +} + + +void trace_dump_clip_state(const struct pipe_clip_state *state) +{ +   unsigned i; +    +   if(!state) { +      trace_dump_null(); +      return; +   } + +   trace_dump_struct_begin("pipe_clip_state"); + +   trace_dump_member_begin("ucp"); +   trace_dump_array_begin(); +   for(i = 0; i < PIPE_MAX_CLIP_PLANES; ++i) { +      trace_dump_elem_begin(); +      trace_dump_array(float, state->ucp[i], 4); +      trace_dump_elem_end(); +   } +   trace_dump_array_end(); +   trace_dump_member_end(); + +   trace_dump_member(uint, state, nr); + +   trace_dump_struct_end(); +} + + +void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) +{ +   if(!state) { +      trace_dump_null(); +      return; +   } + +   trace_dump_struct_begin("pipe_constant_buffer"); + +   trace_dump_member(ptr, state, buffer); + +   trace_dump_struct_end(); +} + + +void trace_dump_shader_state(const struct pipe_shader_state *state) +{ +   static char str[8192]; + +   if(!state) { +      trace_dump_null(); +      return; +   } + +   tgsi_dump_str(state->tokens, 0, str, sizeof(str)); +    +   trace_dump_struct_begin("pipe_shader_state"); + +   trace_dump_member_begin("tokens"); +   trace_dump_string(str); +   trace_dump_member_end(); + +   trace_dump_struct_end(); +} + + +void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state) +{ +   unsigned i; +    +   if(!state) { +      trace_dump_null(); +      return; +   } + +   trace_dump_struct_begin("pipe_depth_stencil_alpha_state"); + +   trace_dump_member_begin("depth"); +   trace_dump_struct_begin("pipe_depth_state"); +   trace_dump_member(bool, &state->depth, enabled); +   trace_dump_member(bool, &state->depth, writemask); +   trace_dump_member(uint, &state->depth, func); +   trace_dump_member(bool, &state->depth, occlusion_count); +   trace_dump_struct_end(); +   trace_dump_member_end(); +    +   trace_dump_member_begin("stencil"); +   trace_dump_array_begin(); +   for(i = 0; i < Elements(state->stencil); ++i) { +      trace_dump_elem_begin(); +      trace_dump_struct_begin("pipe_stencil_state"); +      trace_dump_member(bool, &state->stencil[i], enabled); +      trace_dump_member(uint, &state->stencil[i], func); +      trace_dump_member(uint, &state->stencil[i], fail_op); +      trace_dump_member(uint, &state->stencil[i], zpass_op); +      trace_dump_member(uint, &state->stencil[i], zfail_op); +      trace_dump_member(uint, &state->stencil[i], ref_value); +      trace_dump_member(uint, &state->stencil[i], valuemask); +      trace_dump_member(uint, &state->stencil[i], writemask); +      trace_dump_struct_end(); +      trace_dump_elem_end(); +   } +   trace_dump_array_end(); +   trace_dump_member_end(); + +   trace_dump_member_begin("alpha"); +   trace_dump_struct_begin("pipe_alpha_state"); +   trace_dump_member(bool, &state->alpha, enabled); +   trace_dump_member(uint, &state->alpha, func); +   trace_dump_member(float, &state->alpha, ref_value); +   trace_dump_struct_end(); +   trace_dump_member_end(); + +   trace_dump_struct_end(); +} + + +void trace_dump_blend_state(const struct pipe_blend_state *state) +{ +   if(!state) { +      trace_dump_null(); +      return; +   } + +   trace_dump_struct_begin("pipe_blend_state"); + +   trace_dump_member(bool, state, blend_enable); + +   trace_dump_member(uint, state, rgb_func); +   trace_dump_member(uint, state, rgb_src_factor); +   trace_dump_member(uint, state, rgb_dst_factor); + +   trace_dump_member(uint, state, alpha_func); +   trace_dump_member(uint, state, alpha_src_factor); +   trace_dump_member(uint, state, alpha_dst_factor); + +   trace_dump_member(bool, state, logicop_enable); +   trace_dump_member(uint, state, logicop_func); + +   trace_dump_member(uint, state, colormask); +   trace_dump_member(bool, state, dither); + +   trace_dump_struct_end(); +} + + +void trace_dump_blend_color(const struct pipe_blend_color *state) +{ +   if(!state) { +      trace_dump_null(); +      return; +   } + +   trace_dump_struct_begin("pipe_blend_color"); + +   trace_dump_member_array(float, state, color); + +   trace_dump_struct_end(); +} + + +void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state) +{ +   trace_dump_struct_begin("pipe_framebuffer_state"); + +   trace_dump_member(uint, state, width); +   trace_dump_member(uint, state, height); +   trace_dump_member(uint, state, nr_cbufs); +   trace_dump_member_array(ptr, state, cbufs); +   trace_dump_member(ptr, state, zsbuf); + +   trace_dump_struct_end(); +} + + +void trace_dump_sampler_state(const struct pipe_sampler_state *state) +{ +   if(!state) { +      trace_dump_null(); +      return; +   } + +   trace_dump_struct_begin("pipe_sampler_state"); + +   trace_dump_member(uint, state, wrap_s); +   trace_dump_member(uint, state, wrap_t); +   trace_dump_member(uint, state, wrap_r); +   trace_dump_member(uint, state, min_img_filter); +   trace_dump_member(uint, state, min_mip_filter); +   trace_dump_member(uint, state, mag_img_filter); +   trace_dump_member(bool, state, compare_mode); +   trace_dump_member(uint, state, compare_func); +   trace_dump_member(bool, state, normalized_coords); +   trace_dump_member(uint, state, prefilter); +   trace_dump_member(float, state, shadow_ambient); +   trace_dump_member(float, state, lod_bias); +   trace_dump_member(float, state, min_lod); +   trace_dump_member(float, state, max_lod); +   trace_dump_member_array(float, state, border_color); +   trace_dump_member(float, state, max_anisotropy); + +   trace_dump_struct_end(); +} + + +void trace_dump_surface(const struct pipe_surface *state) +{ +   if(!state) { +      trace_dump_null(); +      return; +   } + +   trace_dump_struct_begin("pipe_surface"); + +   trace_dump_member(format, state, format); +   trace_dump_member(uint, state, status); +   trace_dump_member(uint, state, clear_value); +   trace_dump_member(uint, state, width); +   trace_dump_member(uint, state, height); + +   trace_dump_member_begin("block"); +   trace_dump_block(&state->block); +   trace_dump_member_end(); +    +   trace_dump_member(uint, state, nblocksx); +   trace_dump_member(uint, state, nblocksy); +   trace_dump_member(uint, state, stride); +   trace_dump_member(uint, state, layout); +   trace_dump_member(uint, state, offset); +   trace_dump_member(uint, state, refcount); +   trace_dump_member(uint, state, usage); + +   trace_dump_member(ptr, state, texture); +   trace_dump_member(uint, state, face); +   trace_dump_member(uint, state, level); +   trace_dump_member(uint, state, zslice); + +   trace_dump_struct_end(); +} + + +void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) +{ +   if(!state) { +      trace_dump_null(); +      return; +   } + +   trace_dump_struct_begin("pipe_vertex_buffer"); + +   trace_dump_member(uint, state, stride); +   trace_dump_member(uint, state, max_index); +   trace_dump_member(uint, state, buffer_offset); +   trace_dump_member(ptr, state, buffer); + +   trace_dump_struct_end(); +} + + +void trace_dump_vertex_element(const struct pipe_vertex_element *state) +{ +   if(!state) { +      trace_dump_null(); +      return; +   } + +   trace_dump_struct_begin("pipe_vertex_element"); + +   trace_dump_member(uint, state, src_offset); + +   trace_dump_member(uint, state, vertex_buffer_index); +   trace_dump_member(uint, state, nr_components); +  +   trace_dump_member(format, state, src_format); + +   trace_dump_struct_end(); +} diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h new file mode 100644 index 0000000000..5ae533dc66 --- /dev/null +++ b/src/gallium/drivers/trace/tr_state.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_STATE_H +#define TR_STATE_H + +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" + + +void trace_dump_format(enum pipe_format format); + +void trace_dump_block(const struct pipe_format_block *block); + +void trace_dump_template(const struct pipe_texture *templat); + + +void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state); + +void trace_dump_poly_stipple(const struct pipe_poly_stipple *state); + +void trace_dump_viewport_state(const struct pipe_viewport_state *state); + +void trace_dump_scissor_state(const struct pipe_scissor_state *state); + +void trace_dump_clip_state(const struct pipe_clip_state *state); + +void trace_dump_constant_buffer(const struct pipe_constant_buffer *state); + +void trace_dump_token(const struct tgsi_token *token); + +void trace_dump_shader_state(const struct pipe_shader_state *state); + +void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state); + +void trace_dump_blend_state(const struct pipe_blend_state *state); + +void trace_dump_blend_color(const struct pipe_blend_color *state); + +void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state); + +void trace_dump_sampler_state(const struct pipe_sampler_state *state); + +void trace_dump_surface(const struct pipe_surface *state); + +void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state); + +void trace_dump_vertex_element(const struct pipe_vertex_element *state); + + +#endif /* TR_STATE_H */ diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c new file mode 100644 index 0000000000..1cc4f0bd43 --- /dev/null +++ b/src/gallium/drivers/trace/tr_texture.c @@ -0,0 +1,111 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_inlines.h" +#include "util/u_hash_table.h" +#include "util/u_memory.h" + +#include "tr_screen.h" +#include "tr_texture.h" + + +struct pipe_texture * +trace_texture_create(struct trace_screen *tr_scr,  +                     struct pipe_texture *texture) +{ +   struct trace_texture *tr_tex; +    +   if(!texture) +      goto error; +    +   assert(texture->screen == tr_scr->screen); +    +   tr_tex = CALLOC_STRUCT(trace_texture); +   if(!tr_tex) +      goto error; +    +   memcpy(&tr_tex->base, texture, sizeof(struct pipe_texture)); +   tr_tex->base.screen = &tr_scr->base; +   tr_tex->texture = texture; +    +   return &tr_tex->base; +    +error: +   pipe_texture_reference(&texture, NULL); +   return NULL; +} + + +void +trace_texture_destroy(struct trace_screen *tr_scr,  +                      struct pipe_texture *texture) +{ +   struct trace_texture *tr_tex = trace_texture(tr_scr, texture);  +   pipe_texture_reference(&tr_tex->texture, NULL); +   FREE(tr_tex); +} + + +struct pipe_surface * +trace_surface_create(struct trace_texture *tr_tex,  +                     struct pipe_surface *surface) +{ +   struct trace_surface *tr_surf; +    +   if(!surface) +      goto error; +    +   assert(surface->texture == tr_tex->texture); +    +   tr_surf = CALLOC_STRUCT(trace_surface); +   if(!tr_surf) +      goto error; +    +   memcpy(&tr_surf->base, surface, sizeof(struct pipe_surface)); +    +   tr_surf->base.texture = NULL; +   pipe_texture_reference(&tr_surf->base.texture, &tr_tex->base); +   tr_surf->surface = surface; + +   return &tr_surf->base; +    +error: +   pipe_surface_reference(&surface, NULL); +   return NULL; +} + + +void +trace_surface_destroy(struct trace_texture *tr_tex,  +                      struct pipe_surface *surface) +{ +   struct trace_surface *tr_surf = trace_surface(tr_tex, surface); +   pipe_texture_reference(&tr_surf->base.texture, NULL); +   pipe_surface_reference(&tr_surf->surface, NULL); +   FREE(tr_surf); +} + diff --git a/src/gallium/drivers/trace/tr_texture.h b/src/gallium/drivers/trace/tr_texture.h new file mode 100644 index 0000000000..9e72edb8a3 --- /dev/null +++ b/src/gallium/drivers/trace/tr_texture.h @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_TEXTURE_H_ +#define TR_TEXTURE_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + +#include "tr_screen.h" + + +struct trace_texture +{ +   struct pipe_texture base; + +   struct pipe_texture *texture; +}; + + +struct trace_surface +{ +   struct pipe_surface base; + +   struct pipe_surface *surface; +    +   void *map; +}; + + +static INLINE struct trace_texture * +trace_texture(struct trace_screen *tr_scr,  +              struct pipe_texture *texture) +{ +   if(!texture) +      return NULL; +   assert(texture->screen == &tr_scr->base); +   return (struct trace_texture *)texture; +} + + +static INLINE struct trace_surface * +trace_surface(struct trace_texture *tr_tex,  +              struct pipe_surface *surface) +{ +   if(!surface) +      return NULL; +   assert(surface->texture == &tr_tex->base); +   return (struct trace_surface *)surface; +} + + +struct pipe_texture * +trace_texture_create(struct trace_screen *tr_scr,  +                     struct pipe_texture *texture); + +void +trace_texture_destroy(struct trace_screen *tr_scr,  +                      struct pipe_texture *texture); + +struct pipe_surface * +trace_surface_create(struct trace_texture *tr_tex,  +                     struct pipe_surface *surface); + +void +trace_surface_destroy(struct trace_texture *tr_tex, +                      struct pipe_surface *surface); + + +#endif /* TR_TEXTURE_H_ */ diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c new file mode 100644 index 0000000000..c4148fe810 --- /dev/null +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -0,0 +1,450 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "util/u_hash_table.h" + +#include "tr_dump.h" +#include "tr_state.h" +#include "tr_screen.h" +#include "tr_texture.h" +#include "tr_winsys.h" + + +static unsigned trace_buffer_hash(void *buffer) +{ +   return (unsigned)(uintptr_t)buffer; +} + + +static int trace_buffer_compare(void *buffer1, void *buffer2) +{ +   return (char *)buffer2 - (char *)buffer1; +} + +                   +static const char * +trace_winsys_get_name(struct pipe_winsys *_winsys) +{ +   struct trace_winsys *tr_ws = trace_winsys(_winsys); +   struct pipe_winsys *winsys = tr_ws->winsys; +   const char *result; +    +   trace_dump_call_begin("pipe_winsys", "get_name"); +    +   trace_dump_arg(ptr, winsys); + +   result = winsys->get_name(winsys); +    +   trace_dump_ret(string, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static void  +trace_winsys_flush_frontbuffer(struct pipe_winsys *_winsys, +                               struct pipe_surface *surface, +                               void *context_private) +{ +   struct trace_winsys *tr_ws = trace_winsys(_winsys); +   struct pipe_winsys *winsys = tr_ws->winsys; + +   assert(surface); +   if(surface->texture) { +      struct trace_screen *tr_scr = trace_screen(surface->texture->screen); +      struct trace_texture *tr_tex = trace_texture(tr_scr, surface->texture); +      struct trace_surface *tr_surf = trace_surface(tr_tex, surface); +      surface = tr_surf->surface; +   } +    +   trace_dump_call_begin("pipe_winsys", "flush_frontbuffer"); +    +   trace_dump_arg(ptr, winsys); +   trace_dump_arg(ptr, surface); +   /* XXX: hide, as there is nothing we can do with this +   trace_dump_arg(ptr, context_private); +   */ + +   winsys->flush_frontbuffer(winsys, surface, context_private); +    +   trace_dump_call_end(); +} + + +static struct pipe_buffer * +trace_winsys_surface_buffer_create(struct pipe_winsys *_winsys, +                                   unsigned width, unsigned height, +                                   enum pipe_format format, +                                   unsigned usage, +                                   unsigned *pstride) +{ +   struct trace_winsys *tr_ws = trace_winsys(_winsys); +   struct pipe_winsys *winsys = tr_ws->winsys; +   unsigned stride; +   struct pipe_buffer *result; +    +   trace_dump_call_begin("pipe_winsys", "surface_buffer_create"); +    +   trace_dump_arg(ptr, winsys); +   trace_dump_arg(uint, width); +   trace_dump_arg(uint, height); +   trace_dump_arg(format, format); +   trace_dump_arg(uint, usage); + +   result = winsys->surface_buffer_create(winsys, +                                          width, height, +                                          format, +                                          usage, +                                          pstride); +    +   stride = *pstride; +    +   trace_dump_arg(uint, stride); +    +   trace_dump_ret(ptr, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static struct pipe_buffer * +trace_winsys_buffer_create(struct pipe_winsys *_winsys,  +                           unsigned alignment,  +                           unsigned usage, +                           unsigned size) +{ +   struct trace_winsys *tr_ws = trace_winsys(_winsys); +   struct pipe_winsys *winsys = tr_ws->winsys; +   struct pipe_buffer *buffer; +    +   trace_dump_call_begin("pipe_winsys", "buffer_create"); +    +   trace_dump_arg(ptr, winsys); +   trace_dump_arg(uint, alignment); +   trace_dump_arg(uint, usage); +   trace_dump_arg(uint, size); + +   buffer = winsys->buffer_create(winsys, alignment, usage, size); +    +   trace_dump_ret(ptr, buffer); +    +   trace_dump_call_end(); + +   /* Zero the buffer to avoid dumping uninitialized memory */ +   if(buffer->usage & PIPE_BUFFER_USAGE_CPU_WRITE) { +      void *map; +      map = winsys->buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_WRITE); +      if(map) { +         memset(map, 0, buffer->size); +         winsys->buffer_unmap(winsys, buffer); +      } +   } +    +   return buffer; +} + + +static struct pipe_buffer * +trace_winsys_user_buffer_create(struct pipe_winsys *_winsys,  +                                void *data, +                                unsigned size) +{ +   struct trace_winsys *tr_ws = trace_winsys(_winsys); +   struct pipe_winsys *winsys = tr_ws->winsys; +   struct pipe_buffer *result; +    +   trace_dump_call_begin("pipe_winsys", "user_buffer_create"); +    +   trace_dump_arg(ptr, winsys); +   trace_dump_arg_begin("data"); +   trace_dump_bytes(data, size); +   trace_dump_arg_end(); +   trace_dump_arg(uint, size); + +   result = winsys->user_buffer_create(winsys, data, size); +    +   trace_dump_ret(ptr, result); +    +   trace_dump_call_end(); +    +   /* XXX: Mark the user buffers. (we should wrap pipe_buffers, but is is  +    * impossible to do so while texture-less surfaces are still around */ +   if(result) { +      assert(!(result->usage & TRACE_BUFFER_USAGE_USER)); +      result->usage |= TRACE_BUFFER_USAGE_USER; +   } +    +   return result; +} + + +void +trace_winsys_user_buffer_update(struct pipe_winsys *_winsys,  +                                struct pipe_buffer *buffer) +{ +   struct trace_winsys *tr_ws = trace_winsys(_winsys); +   struct pipe_winsys *winsys = tr_ws->winsys; +   const void *map; +    +   if(buffer && buffer->usage & TRACE_BUFFER_USAGE_USER) { +      map = winsys->buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_READ); +      if(map) { +         trace_dump_call_begin("pipe_winsys", "buffer_write"); +          +         trace_dump_arg(ptr, winsys); +          +         trace_dump_arg(ptr, buffer); +          +         trace_dump_arg_begin("data"); +         trace_dump_bytes(map, buffer->size); +         trace_dump_arg_end(); +       +         trace_dump_arg_begin("size"); +         trace_dump_uint(buffer->size); +         trace_dump_arg_end(); +       +         trace_dump_call_end(); +          +         winsys->buffer_unmap(winsys, buffer); +      } +   } +} + + +static void * +trace_winsys_buffer_map(struct pipe_winsys *_winsys,  +                        struct pipe_buffer *buffer, +                        unsigned usage) +{ +   struct trace_winsys *tr_ws = trace_winsys(_winsys); +   struct pipe_winsys *winsys = tr_ws->winsys; +   void *map; +    +   map = winsys->buffer_map(winsys, buffer, usage); +   if(map) { +      if(usage & PIPE_BUFFER_USAGE_CPU_WRITE) { +         assert(!hash_table_get(tr_ws->buffer_maps, buffer)); +         hash_table_set(tr_ws->buffer_maps, buffer, map); +      } +   } +    +   return map; +} + + +static void +trace_winsys_buffer_unmap(struct pipe_winsys *_winsys,  +                          struct pipe_buffer *buffer) +{ +   struct trace_winsys *tr_ws = trace_winsys(_winsys); +   struct pipe_winsys *winsys = tr_ws->winsys; +   const void *map; +    +   map = hash_table_get(tr_ws->buffer_maps, buffer); +   if(map) { +      trace_dump_call_begin("pipe_winsys", "buffer_write"); +       +      trace_dump_arg(ptr, winsys); +       +      trace_dump_arg(ptr, buffer); +       +      trace_dump_arg_begin("data"); +      trace_dump_bytes(map, buffer->size); +      trace_dump_arg_end(); + +      trace_dump_arg_begin("size"); +      trace_dump_uint(buffer->size); +      trace_dump_arg_end(); +    +      trace_dump_call_end(); + +      hash_table_remove(tr_ws->buffer_maps, buffer); +   } +    +   winsys->buffer_unmap(winsys, buffer); +} + + +static void +trace_winsys_buffer_destroy(struct pipe_winsys *_winsys, +                            struct pipe_buffer *buffer) +{ +   struct trace_winsys *tr_ws = trace_winsys(_winsys); +   struct pipe_winsys *winsys = tr_ws->winsys; +    +   trace_dump_call_begin("pipe_winsys", "buffer_destroy"); +    +   trace_dump_arg(ptr, winsys); +   trace_dump_arg(ptr, buffer); + +   winsys->buffer_destroy(winsys, buffer); +    +   trace_dump_call_end(); +} + + +static void +trace_winsys_fence_reference(struct pipe_winsys *_winsys, +                             struct pipe_fence_handle **pdst, +                             struct pipe_fence_handle *src) +{ +   struct trace_winsys *tr_ws = trace_winsys(_winsys); +   struct pipe_winsys *winsys = tr_ws->winsys; +   struct pipe_fence_handle *dst = *pdst; +    +   trace_dump_call_begin("pipe_winsys", "fence_reference"); +    +   trace_dump_arg(ptr, winsys); +   trace_dump_arg(ptr, dst); +   trace_dump_arg(ptr, src); + +   winsys->fence_reference(winsys, pdst, src); +    +   trace_dump_call_end(); +} + + +static int +trace_winsys_fence_signalled(struct pipe_winsys *_winsys, +                             struct pipe_fence_handle *fence, +                             unsigned flag) +{ +   struct trace_winsys *tr_ws = trace_winsys(_winsys); +   struct pipe_winsys *winsys = tr_ws->winsys; +   int result; +    +   trace_dump_call_begin("pipe_winsys", "fence_signalled"); +    +   trace_dump_arg(ptr, winsys); +   trace_dump_arg(ptr, fence); +   trace_dump_arg(uint, flag); + +   result = winsys->fence_signalled(winsys, fence, flag); +    +   trace_dump_ret(int, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static int +trace_winsys_fence_finish(struct pipe_winsys *_winsys, +                          struct pipe_fence_handle *fence, +                          unsigned flag) +{ +   struct trace_winsys *tr_ws = trace_winsys(_winsys); +   struct pipe_winsys *winsys = tr_ws->winsys; +   int result; +    +   trace_dump_call_begin("pipe_winsys", "fence_finish"); +    +   trace_dump_arg(ptr, winsys); +   trace_dump_arg(ptr, fence); +   trace_dump_arg(uint, flag); + +   result = winsys->fence_finish(winsys, fence, flag); +    +   trace_dump_ret(int, result); +    +   trace_dump_call_end(); +    +   return result; +} + + +static void +trace_winsys_destroy(struct pipe_winsys *_winsys) +{ +   struct trace_winsys *tr_ws = trace_winsys(_winsys); +   struct pipe_winsys *winsys = tr_ws->winsys; +    +   trace_dump_call_begin("pipe_winsys", "destroy"); +    +   trace_dump_arg(ptr, winsys); + +   /*  +   winsys->destroy(winsys);  +   */ +    +   trace_dump_call_end(); +    +   hash_table_destroy(tr_ws->buffer_maps); + +   FREE(tr_ws); +} + + +struct pipe_winsys * +trace_winsys_create(struct pipe_winsys *winsys) +{ +   struct trace_winsys *tr_ws; +    +   if(!winsys) +      goto error1; +    +   tr_ws = CALLOC_STRUCT(trace_winsys); +   if(!tr_ws) +      goto error1; + +   tr_ws->base.destroy = trace_winsys_destroy; +   tr_ws->base.get_name = trace_winsys_get_name; +   tr_ws->base.flush_frontbuffer = trace_winsys_flush_frontbuffer; +   tr_ws->base.surface_buffer_create = trace_winsys_surface_buffer_create; +   tr_ws->base.buffer_create = trace_winsys_buffer_create; +   tr_ws->base.user_buffer_create = trace_winsys_user_buffer_create; +   tr_ws->base.buffer_map = trace_winsys_buffer_map; +   tr_ws->base.buffer_unmap = trace_winsys_buffer_unmap; +   tr_ws->base.buffer_destroy = trace_winsys_buffer_destroy; +   tr_ws->base.fence_reference = trace_winsys_fence_reference; +   tr_ws->base.fence_signalled = trace_winsys_fence_signalled; +   tr_ws->base.fence_finish = trace_winsys_fence_finish; +    +   tr_ws->winsys = winsys; + +   tr_ws->buffer_maps = hash_table_create(trace_buffer_hash,  +                                          trace_buffer_compare); +   if(!tr_ws->buffer_maps) +      goto error2; +    +   trace_dump_call_begin("", "pipe_winsys_create"); +   trace_dump_ret(ptr, winsys); +   trace_dump_call_end(); + +   return &tr_ws->base; +    +error2: +   FREE(tr_ws); +error1: +   return winsys; +} diff --git a/src/gallium/drivers/trace/tr_winsys.h b/src/gallium/drivers/trace/tr_winsys.h new file mode 100644 index 0000000000..0fd2a40556 --- /dev/null +++ b/src/gallium/drivers/trace/tr_winsys.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_WINSYS_H_ +#define TR_WINSYS_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/internal/p_winsys_screen.h" + + +/** + * It often happens that new data is written directly to the user buffers  + * without mapping/unmapping. This flag marks user buffers, so that their  + * contents can be dumpped before being used by the pipe context. + */ +#define TRACE_BUFFER_USAGE_USER  (1 << 31) + + +struct hash_table; + + +struct trace_winsys +{ +   struct pipe_winsys base; +    +   struct pipe_winsys *winsys; +    +   struct hash_table *buffer_maps; +}; + + +static INLINE struct trace_winsys * +trace_winsys(struct pipe_winsys *winsys) +{ +   assert(winsys); +   return (struct trace_winsys *)winsys; +} + + + +struct pipe_winsys * +trace_winsys_create(struct pipe_winsys *winsys); + + +void +trace_winsys_user_buffer_update(struct pipe_winsys *winsys,  +                                struct pipe_buffer *buffer); + + +#endif /* TR_WINSYS_H_ */ diff --git a/src/gallium/drivers/trace/trace.xsl b/src/gallium/drivers/trace/trace.xsl new file mode 100644 index 0000000000..9cd621e7ab --- /dev/null +++ b/src/gallium/drivers/trace/trace.xsl @@ -0,0 +1,185 @@ +<?xml version="1.0"?> + +<!-- + +Copyright 2008 Tungsten Graphics, Inc. + +This program is free software: you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program.  If not, see <http://www.gnu.org/licenses/>. + +!--> + +<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> + +	<xsl:output method="html" /> + +	<xsl:strip-space elements="*" /> + +	<xsl:template match="/trace"> +		<html> +			<head> +				<title>Gallium Trace</title> +			</head> +			<style> +				body { +					font-family: verdana, sans-serif; +					font-size: 11px; +					font-weight: normal; +					text-align : left; +				} + +				.fun { +					font-weight: bold; +				} + +				.var { +					font-style: italic; +				} + +				.typ { +					display: none; +				} + +				.lit { +					color: #0000ff; +				} + +				.ptr { +					color: #008000; +				} +			</style> +			<body> +				<ol class="calls"> +					<xsl:apply-templates/> +				</ol> +			</body> +		</html> +	</xsl:template> + +	<xsl:template match="call"> +		<li> +			<span class="fun"> +				<xsl:value-of select="@class"/> +				<xsl:text>::</xsl:text> +				<xsl:value-of select="@method"/> +			</span> +			<xsl:text>(</xsl:text> +			<xsl:apply-templates select="arg"/> +			<xsl:text>)</xsl:text> +			<xsl:apply-templates select="ret"/> +		</li> +	</xsl:template> + +	<xsl:template match="arg|member"> +			<xsl:apply-templates select="@name"/> +			<xsl:text> = </xsl:text> +			<xsl:apply-templates /> +			<xsl:if test="position() != last()"> +				<xsl:text>, </xsl:text> +			</xsl:if> +	</xsl:template> + +	<xsl:template match="ret"> +		<xsl:text> = </xsl:text> +		<xsl:apply-templates /> +	</xsl:template> + +	<xsl:template match="bool|int|uint|float|enum"> +		<span class="lit"> +			<xsl:value-of select="text()"/> +		</span> +	</xsl:template> + +	<xsl:template match="bytes"> +		<span class="lit"> +			<xsl:text>...</xsl:text> +		</span> +	</xsl:template> + +	<xsl:template match="string"> +		<span class="lit"> +			<xsl:text>"</xsl:text> +			<xsl:call-template name="break"> +				<xsl:with-param name="text" select="text()"/> +			</xsl:call-template> +			<xsl:text>"</xsl:text> +		</span> +	</xsl:template> + +	<xsl:template match="array|struct"> +		<xsl:text>{</xsl:text> +		<xsl:apply-templates /> +		<xsl:text>}</xsl:text> +	</xsl:template> + +	<xsl:template match="elem"> +		<xsl:apply-templates /> +		<xsl:if test="position() != last()"> +			<xsl:text>, </xsl:text> +		</xsl:if> +	</xsl:template> + +	<xsl:template match="null"> +		<span class="ptr"> +			<xsl:text>NULL</xsl:text> +		</span> +	</xsl:template> + +	<xsl:template match="ptr"> +		<span class="ptr"> +			<xsl:value-of select="text()"/> +		</span> +	</xsl:template> + +	<xsl:template match="@name"> +		<span class="var"> +			<xsl:value-of select="."/> +		</span> +	</xsl:template> +	 +	<xsl:template name="break"> +		<xsl:param name="text" select="."/> +		<xsl:choose> +			<xsl:when test="contains($text, '
')"> +				<xsl:value-of select="substring-before($text, '
')"/> +				<br/> +				<xsl:call-template name="break"> +					 <xsl:with-param name="text" select="substring-after($text, '
')"/> +				</xsl:call-template> +			</xsl:when> +			<xsl:otherwise> +				<xsl:value-of select="$text"/> +			</xsl:otherwise> +		</xsl:choose> +	</xsl:template> + +	<xsl:template name="replace"> +		<xsl:param name="text"/> +		<xsl:param name="from"/> +		<xsl:param name="to"/> +		<xsl:choose> +			<xsl:when test="contains($text,$from)"> +				<xsl:value-of select="concat(substring-before($text,$from),$to)"/> +				<xsl:call-template name="replace"> +					<xsl:with-param name="text" select="substring-after($text,$from)"/> +					<xsl:with-param name="from" select="$from"/> +					<xsl:with-param name="to" select="$to"/> +				</xsl:call-template> +			</xsl:when> +			<xsl:otherwise> +				<xsl:value-of select="$text"/> +			</xsl:otherwise> +		</xsl:choose> +	</xsl:template> + +</xsl:transform>  | 
